diff --git a/docs/_posts/DevinTDHa/2023-09-20-image_captioning_vit_gpt2_en.md b/docs/_posts/DevinTDHa/2023-09-20-image_captioning_vit_gpt2_en.md new file mode 100644 index 00000000000000..f7573201263d99 --- /dev/null +++ b/docs/_posts/DevinTDHa/2023-09-20-image_captioning_vit_gpt2_en.md @@ -0,0 +1,125 @@ +--- +layout: model +title: Image Caption with VisionEncoderDecoder ViT GPT2 +author: John Snow Labs +name: image_captioning_vit_gpt2 +date: 2023-09-20 +tags: [en, vit, gpt2, image, captioning, open_source, tensorflow] +task: Image Captioning +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: VisionEncoderDecoderForImageCaptioning +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +This is an image captioning model using ViT to encode images and GPT2 to generate captions. Original model from https://huggingface.co/nlpconnect/vit-gpt2-image-captioning + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/image_captioning_vit_gpt2_en_5.1.2_3.0_1695215721202.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/image_captioning_vit_gpt2_en_5.1.2_3.0_1695215721202.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +from sparknlp.base import * +from sparknlp.annotator import * +from pyspark.ml import Pipeline +imageDF = spark.read \ + .format("image") \ + .option("dropInvalid", value = True) \ + .load("src/test/resources/image/") +imageAssembler = ImageAssembler() \ + .setInputCol("image") \ + .setOutputCol("image_assembler") +imageCaptioning = VisionEncoderDecoderForImageCaptioning \ + .pretrained() \ + .setBeamSize(2) \ + .setDoSample(False) \ + .setInputCols(["image_assembler"]) \ + .setOutputCol("caption") +pipeline = Pipeline().setStages([imageAssembler, imageCaptioning]) +pipelineDF = pipeline.fit(imageDF).transform(imageDF) +pipelineDF \ + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "caption.result") \ + .show(truncate = False) +``` +```scala +import com.johnsnowlabs.nlp.annotator._ +import com.johnsnowlabs.nlp.ImageAssembler +import org.apache.spark.ml.Pipeline + +val imageDF: DataFrame = spark.read + .format("image") + .option("dropInvalid", value = true) + .load("src/test/resources/image/") + +val imageCaptioning = new ImageAssembler() + .setInputCol("image") + .setOutputCol("image_assembler") + +val imageClassifier = VisionEncoderDecoderForImageCaptioning + .pretrained() + .setBeamSize(2) + .setDoSample(false) + .setInputCols("image_assembler") + .setOutputCol("caption") + +val pipeline = new Pipeline().setStages(Array(imageAssembler, imageCaptioning)) +val pipelineDF = pipeline.fit(imageDF).transform(imageDF) + +pipelineDF + .selectExpr("reverse(split(image.origin, '/'))[0] as image_name", "caption.result") + .show(truncate = false) +``` +
+ +## Results + +```bash ++-----------------+---------------------------------------------------------+ +|image_name |result | ++-----------------+---------------------------------------------------------+ +|palace.JPEG |[a large room filled with furniture and a large window] | +|egyptian_cat.jpeg|[a cat laying on a couch next to another cat] | +|hippopotamus.JPEG|[a brown bear in a body of water] | +|hen.JPEG |[a flock of chickens standing next to each other] | +|ostrich.JPEG |[a large bird standing on top of a lush green field] | +|junco.JPEG |[a small bird standing on a wet ground] | +|bluetick.jpg |[a small dog standing on a wooden floor] | +|chihuahua.jpg |[a small brown dog wearing a blue sweater] | +|tractor.JPEG |[a man is standing in a field with a tractor] | +|ox.JPEG |[a large brown cow standing on top of a lush green field]| ++-----------------+---------------------------------------------------------+ +``` + +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|image_captioning_vit_gpt2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[image_assembler]| +|Output Labels:|[caption]| +|Language:|en| +|Size:|890.3 MB| diff --git a/docs/_posts/LIN-Yu-Ting/2023-09-18-AtgxRobertaBaseSquad2_en.md b/docs/_posts/LIN-Yu-Ting/2023-09-18-AtgxRobertaBaseSquad2_en.md new file mode 100644 index 00000000000000..09f07830994d02 --- /dev/null +++ b/docs/_posts/LIN-Yu-Ting/2023-09-18-AtgxRobertaBaseSquad2_en.md @@ -0,0 +1,61 @@ +--- +layout: model +title: Atgenomix Testing QA Model +author: LIN-Yu-Ting +name: AtgxRobertaBaseSquad2 +date: 2023-09-18 +tags: [en, open_source, tensorflow] +task: Question Answering +language: en +edition: Spark NLP 4.4.3 +spark_version: 3.4 +supported: false +engine: tensorflow +annotator: RoBertaForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Testing Question Answering model for Atgenomix usage + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/community.johnsnowlabs.com/LIN-Yu-Ting/AtgxRobertaBaseSquad2_en_4.4.3_3.4_1695000774804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://community.johnsnowlabs.com/LIN-Yu-Ting/AtgxRobertaBaseSquad2_en_4.4.3_3.4_1695000774804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +import sparknlp +spark = sparknlp.start() +``` + +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|AtgxRobertaBaseSquad2| +|Compatibility:|Spark NLP 4.4.3+| +|License:|Open Source| +|Edition:|Community| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|460.0 MB| +|Case sensitive:|true| +|Max sentence length:|512| \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_allnli_gronlp_base_dutch_cased_nl.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_allnli_gronlp_base_dutch_cased_nl.md new file mode 100644 index 00000000000000..fad7aaee3308db --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_allnli_gronlp_base_dutch_cased_nl.md @@ -0,0 +1,99 @@ +--- +layout: model +title: Dutch BertEmbeddings Base Cased model (from textgain) +author: John Snow Labs +name: bert_embeddings_allnli_gronlp_base_dutch_cased +date: 2023-09-22 +tags: [nl, open_source, bert_embeddings, onnx] +task: Embeddings +language: nl +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `allnli-GroNLP-bert-base-dutch-cased` is a Dutch model originally trained by `textgain`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_allnli_gronlp_base_dutch_cased_nl_5.1.0_3.0_1695368402996.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_allnli_gronlp_base_dutch_cased_nl_5.1.0_3.0_1695368402996.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_allnli_gronlp_base_dutch_cased","nl") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_allnli_gronlp_base_dutch_cased","nl") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_allnli_gronlp_base_dutch_cased| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|nl| +|Size:|406.8 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/textgain/allnli-GroNLP-bert-base-dutch-cased +- https://www.SBERT.net +- https://www.SBERT.net +- https://www.SBERT.net +- https://seb.sbert.net?model_name=%7BMODEL_NAME%7D \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_base_en.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_base_en.md new file mode 100644 index 00000000000000..4ac379785202a2 --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_base_en.md @@ -0,0 +1,135 @@ +--- +layout: model +title: English BertEmbeddings Base Cased model (from BAAI) +author: John Snow Labs +name: bert_embeddings_bge_base +date: 2023-09-22 +tags: [en, open_source, bert_embeddings, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bge-base-en` is a English model originally trained by `BAAI`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_bge_base_en_5.1.0_3.0_1695368493416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_bge_base_en_5.1.0_3.0_1695368493416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_bge_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_bge_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_bge_base| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|259.0 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/BAAI/bge-base-en +- https://github.com/FlagOpen/FlagEmbedding +- https://github.com/FlagOpen/FlagEmbedding/blob/master/README_zh.md +- https://arxiv.org/pdf/2309.07597.pdf +- https://data.baai.ac.cn/details/BAAI-MTP +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune#hard-negatives +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list +- https://www.SBERT.net +- https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md +- https://platform.openai.com/docs/guides/embeddings +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md +- https://platform.openai.com/docs/guides/embeddings/what-are-embeddings +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/ +- https://github.com/staoxiao/RetroMAE +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/pretrain +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/reranker +- https://github.com/FlagOpen/FlagEmbedding/blob/master/LICENSE +- https://paperswithcode.com/sota?task=Classification&dataset=MTEB+AmazonCounterfactualClassification+%28en%29 \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_large_en.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_large_en.md new file mode 100644 index 00000000000000..2de0442d077a18 --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_large_en.md @@ -0,0 +1,135 @@ +--- +layout: model +title: English BertEmbeddings Large Cased model (from BAAI) +author: John Snow Labs +name: bert_embeddings_bge_large +date: 2023-09-22 +tags: [en, open_source, bert_embeddings, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bge-large-en` is a English model originally trained by `BAAI`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_bge_large_en_5.1.0_3.0_1695368740777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_bge_large_en_5.1.0_3.0_1695368740777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_bge_large","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_bge_large","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_bge_large| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|795.1 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/BAAI/bge-large-en +- https://github.com/FlagOpen/FlagEmbedding +- https://github.com/FlagOpen/FlagEmbedding/blob/master/README_zh.md +- https://arxiv.org/pdf/2309.07597.pdf +- https://data.baai.ac.cn/details/BAAI-MTP +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune#hard-negatives +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list +- https://www.SBERT.net +- https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md +- https://platform.openai.com/docs/guides/embeddings +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md +- https://platform.openai.com/docs/guides/embeddings/what-are-embeddings +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/ +- https://github.com/staoxiao/RetroMAE +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/pretrain +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/reranker +- https://github.com/FlagOpen/FlagEmbedding/blob/master/LICENSE +- https://paperswithcode.com/sota?task=Classification&dataset=MTEB+AmazonCounterfactualClassification+%28en%29 \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_small_en.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_small_en.md new file mode 100644 index 00000000000000..56a008b4d10b5f --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_bge_small_en.md @@ -0,0 +1,135 @@ +--- +layout: model +title: English BertEmbeddings Small Cased model (from BAAI) +author: John Snow Labs +name: bert_embeddings_bge_small +date: 2023-09-22 +tags: [en, open_source, bert_embeddings, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bge-small-en` is a English model originally trained by `BAAI`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_bge_small_en_5.1.0_3.0_1695368784401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_bge_small_en_5.1.0_3.0_1695368784401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_bge_small","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_bge_small","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_bge_small| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|79.9 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/BAAI/bge-small-en +- https://github.com/FlagOpen/FlagEmbedding +- https://github.com/FlagOpen/FlagEmbedding/blob/master/README_zh.md +- https://arxiv.org/pdf/2309.07597.pdf +- https://data.baai.ac.cn/details/BAAI-MTP +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/C_MTEB +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune#hard-negatives +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list +- https://www.SBERT.net +- https://github.com/FlagOpen/FlagEmbedding/tree/master#model-list +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md +- https://platform.openai.com/docs/guides/embeddings +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/README.md +- https://platform.openai.com/docs/guides/embeddings/what-are-embeddings +- https://github.com/FlagOpen/FlagEmbedding/blob/master/C_MTEB/ +- https://github.com/staoxiao/RetroMAE +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/finetune +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/pretrain +- https://github.com/FlagOpen/FlagEmbedding/blob/master/FlagEmbedding/baai_general_embedding/README.md +- https://github.com/FlagOpen/FlagEmbedding/tree/master/examples/reranker +- https://github.com/FlagOpen/FlagEmbedding/tree/master/FlagEmbedding/reranker +- https://github.com/FlagOpen/FlagEmbedding/blob/master/LICENSE +- https://paperswithcode.com/sota?task=Classification&dataset=MTEB+AmazonCounterfactualClassification+%28en%29 \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_bi_encoder_msmarco_base_german_de.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_bi_encoder_msmarco_base_german_de.md new file mode 100644 index 00000000000000..759af7155db979 --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_bi_encoder_msmarco_base_german_de.md @@ -0,0 +1,114 @@ +--- +layout: model +title: German BertEmbeddings Base Cased model (from PM-AI) +author: John Snow Labs +name: bert_embeddings_bi_encoder_msmarco_base_german +date: 2023-09-22 +tags: [de, open_source, bert_embeddings, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bi-encoder_msmarco_bert-base_german` is a German model originally trained by `PM-AI`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_bi_encoder_msmarco_base_german_de_5.1.0_3.0_1695368809437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_bi_encoder_msmarco_base_german_de_5.1.0_3.0_1695368809437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_bi_encoder_msmarco_base_german","de") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_bi_encoder_msmarco_base_german","de") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_bi_encoder_msmarco_base_german| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|de| +|Size:|409.7 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/PM-AI/bi-encoder_msmarco_bert-base_german +- https://github.com/UKPLab/sentence-transformers +- https://microsoft.github.io/msmarco/#ranking +- https://arxiv.org/abs/2108.13897 +- https://openreview.net/forum?id=wCu6T5xFjeJ +- https://public.ukp.informatik.tu-darmstadt.de/thakur/BEIR/datasets/ +- https://github.com/beir-cellar/beir +- https://github.com/beir-cellar/beir/blob/main/examples/retrieval/training/train_msmarco_v3_margin_MSE.py +- https://sbert.net/datasets/msmarco-hard-negatives.jsonl.gz +- https://github.com/beir-cellar/beir/blob/main/examples/retrieval/training/train_msmarco_v3_margin_MSE.py%5D +- https://github.com/UKPLab/sentence-transformers/blob/master/examples/training/ms_marco/README.md +- https://github.com/beir-cellar/beir/blob/main/examples/retrieval/training/train_msmarco_v3_margin_MSE.py +- https://arxiv.org/abs/2104.12741 +- https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-similarity.html#bm25 +- https://en.th-wildau.de/ +- https://senseaition.com/ +- https://www.linkedin.com/in/herrphilipps +- https://efre.brandenburg.de/efre/de/ +- https://www.senseaition.com +- https://www.th-wildau.de \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_claif_base_en.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_claif_base_en.md new file mode 100644 index 00000000000000..04c43275456dc3 --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_claif_base_en.md @@ -0,0 +1,99 @@ +--- +layout: model +title: English BertEmbeddings Base Cased model (from fnlp) +author: John Snow Labs +name: bert_embeddings_claif_base +date: 2023-09-22 +tags: [en, open_source, bert_embeddings, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `claif-bert-base` is a English model originally trained by `fnlp`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_claif_base_en_5.1.0_3.0_1695368841856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_claif_base_en_5.1.0_3.0_1695368841856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_claif_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_claif_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_claif_base| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.2 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/fnlp/claif-bert-base +- https://www.SBERT.net +- https://www.SBERT.net +- https://www.SBERT.net +- https://seb.sbert.net?model_name=fnlp/claif-bert-base \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_claif_scaled_base_en.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_claif_scaled_base_en.md new file mode 100644 index 00000000000000..7e83bf56c15a09 --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_claif_scaled_base_en.md @@ -0,0 +1,99 @@ +--- +layout: model +title: English BertEmbeddings Base Cased model (from fnlp) +author: John Snow Labs +name: bert_embeddings_claif_scaled_base +date: 2023-09-22 +tags: [en, open_source, bert_embeddings, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `claif-scaled-bert-base` is a English model originally trained by `fnlp`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_claif_scaled_base_en_5.1.0_3.0_1695368874285.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_claif_scaled_base_en_5.1.0_3.0_1695368874285.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_claif_scaled_base","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_claif_scaled_base","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_claif_scaled_base| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.2 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/fnlp/claif-scaled-bert-base +- https://www.SBERT.net +- https://www.SBERT.net +- https://www.SBERT.net +- https://seb.sbert.net?model_name=fnlp/claif-scaled-bert-base \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_frpile_gpl_en.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_frpile_gpl_en.md new file mode 100644 index 00000000000000..89d0fa73f1ee50 --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_frpile_gpl_en.md @@ -0,0 +1,99 @@ +--- +layout: model +title: English BertEmbeddings Cased model (from DragosGorduza) +author: John Snow Labs +name: bert_embeddings_frpile_gpl +date: 2023-09-22 +tags: [en, open_source, bert_embeddings, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `FRPile_GPL` is a English model originally trained by `DragosGorduza`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_frpile_gpl_en_5.1.0_3.0_1695368293927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_frpile_gpl_en_5.1.0_3.0_1695368293927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_frpile_gpl","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_frpile_gpl","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_frpile_gpl| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|1.3 GB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/DragosGorduza/FRPile_GPL +- https://www.SBERT.net +- https://www.SBERT.net +- https://www.SBERT.net +- https://seb.sbert.net?model_name=%7BMODEL_NAME%7D \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_retromae_beir_en.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_retromae_beir_en.md new file mode 100644 index 00000000000000..1586fef180f6b0 --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_retromae_beir_en.md @@ -0,0 +1,100 @@ +--- +layout: model +title: English BertEmbeddings Cased model (from nthakur) +author: John Snow Labs +name: bert_embeddings_retromae_beir +date: 2023-09-22 +tags: [en, open_source, bert_embeddings, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `RetroMAE_BEIR` is a English model originally trained by `nthakur`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_retromae_beir_en_5.1.0_3.0_1695368339221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_retromae_beir_en_5.1.0_3.0_1695368339221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_retromae_beir","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_retromae_beir","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_retromae_beir| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.6 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/nthakur/RetroMAE_BEIR +- https://www.SBERT.net +- https://www.SBERT.net +- https://www.SBERT.net +- https://seb.sbert.net?model_name=nthakur/RetroMAE_BEIR +- https://github.com/staoxiao/RetroMAE/ \ No newline at end of file diff --git a/docs/_posts/SKocer/2023-09-22-bert_embeddings_retromae_msmarco_finetune_en.md b/docs/_posts/SKocer/2023-09-22-bert_embeddings_retromae_msmarco_finetune_en.md new file mode 100644 index 00000000000000..b303400648d346 --- /dev/null +++ b/docs/_posts/SKocer/2023-09-22-bert_embeddings_retromae_msmarco_finetune_en.md @@ -0,0 +1,100 @@ +--- +layout: model +title: English BertEmbeddings Cased model (from nthakur) +author: John Snow Labs +name: bert_embeddings_retromae_msmarco_finetune +date: 2023-09-22 +tags: [en, open_source, bert_embeddings, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.0 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `RetroMAE_MSMARCO_finetune` is a English model originally trained by `nthakur`. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embeddings_retromae_msmarco_finetune_en_5.1.0_3.0_1695368370969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embeddings_retromae_msmarco_finetune_en_5.1.0_3.0_1695368370969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCols(["text"]) \ + .setOutputCols("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +bert_loaded = BertEmbeddings.pretrained("bert_embeddings_retromae_msmarco_finetune","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, bert_loaded]) + +data = spark.createDataFrame([["I love Spark NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCols(Array("text")) + .setOutputCols(Array("document")) + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val bert_loaded = BertEmbeddings.pretrained("bert_embeddings_retromae_msmarco_finetune","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, bert_loaded)) + +val data = Seq("I love Spark NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embeddings_retromae_msmarco_finetune| +|Compatibility:|Spark NLP 5.1.0+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document, token]| +|Output Labels:|[bert]| +|Language:|en| +|Size:|407.7 MB| +|Case sensitive:|true| + +## References + +- https://huggingface.co/nthakur/RetroMAE_MSMARCO_finetune +- https://www.SBERT.net +- https://www.SBERT.net +- https://www.SBERT.net +- https://seb.sbert.net?model_name=nthakur/RetroMAE_MSMARCO_finetune +- https://github.com/staoxiao/RetroMAE/ \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-20split_dataset_en.md b/docs/_posts/ahmedlone127/2023-09-12-20split_dataset_en.md new file mode 100644 index 00000000000000..780390afc444b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-20split_dataset_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 20split_dataset BertEmbeddings from Billwzl +author: John Snow Labs +name: 20split_dataset +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20split_dataset` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20split_dataset_en_5.1.1_3.0_1694558868282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20split_dataset_en_5.1.1_3.0_1694558868282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("20split_dataset","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("20split_dataset", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20split_dataset| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Billwzl/20split_dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-abena_base_akuapem_twi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-abena_base_akuapem_twi_cased_en.md new file mode 100644 index 00000000000000..3b52b9a141e7c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-abena_base_akuapem_twi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English abena_base_akuapem_twi_cased BertEmbeddings from Ghana-NLP +author: John Snow Labs +name: abena_base_akuapem_twi_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`abena_base_akuapem_twi_cased` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/abena_base_akuapem_twi_cased_en_5.1.1_3.0_1694558470329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/abena_base_akuapem_twi_cased_en_5.1.1_3.0_1694558470329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("abena_base_akuapem_twi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("abena_base_akuapem_twi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|abena_base_akuapem_twi_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.5 MB| + +## References + +https://huggingface.co/Ghana-NLP/abena-base-akuapem-twi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-abena_base_asante_twi_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-abena_base_asante_twi_uncased_en.md new file mode 100644 index 00000000000000..f196600033a6cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-abena_base_asante_twi_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English abena_base_asante_twi_uncased BertEmbeddings from Ghana-NLP +author: John Snow Labs +name: abena_base_asante_twi_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`abena_base_asante_twi_uncased` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/abena_base_asante_twi_uncased_en_5.1.1_3.0_1694558682719.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/abena_base_asante_twi_uncased_en_5.1.1_3.0_1694558682719.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("abena_base_asante_twi_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("abena_base_asante_twi_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|abena_base_asante_twi_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.3 MB| + +## References + +https://huggingface.co/Ghana-NLP/abena-base-asante-twi-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_query_pubmed_en.md b/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_query_pubmed_en.md new file mode 100644 index 00000000000000..934d2783451faf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_query_pubmed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English abstract_sim_query_pubmed BertEmbeddings from biu-nlp +author: John Snow Labs +name: abstract_sim_query_pubmed +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`abstract_sim_query_pubmed` is a English model originally trained by biu-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/abstract_sim_query_pubmed_en_5.1.1_3.0_1694561530585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/abstract_sim_query_pubmed_en_5.1.1_3.0_1694561530585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("abstract_sim_query_pubmed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("abstract_sim_query_pubmed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|abstract_sim_query_pubmed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.2 MB| + +## References + +https://huggingface.co/biu-nlp/abstract-sim-query-pubmed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_sentence_pubmed_en.md b/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_sentence_pubmed_en.md new file mode 100644 index 00000000000000..67b65af41b7b7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-abstract_sim_sentence_pubmed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English abstract_sim_sentence_pubmed BertEmbeddings from biu-nlp +author: John Snow Labs +name: abstract_sim_sentence_pubmed +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`abstract_sim_sentence_pubmed` is a English model originally trained by biu-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/abstract_sim_sentence_pubmed_en_5.1.1_3.0_1694561658345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/abstract_sim_sentence_pubmed_en_5.1.1_3.0_1694561658345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("abstract_sim_sentence_pubmed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("abstract_sim_sentence_pubmed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|abstract_sim_sentence_pubmed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.3 MB| + +## References + +https://huggingface.co/biu-nlp/abstract-sim-sentence-pubmed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ai12_mackei_en.md b/docs/_posts/ahmedlone127/2023-09-12-ai12_mackei_en.md new file mode 100644 index 00000000000000..dd2be1f5865cf6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ai12_mackei_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ai12_mackei BertEmbeddings from mackei +author: John Snow Labs +name: ai12_mackei +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ai12_mackei` is a English model originally trained by mackei. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ai12_mackei_en_5.1.1_3.0_1694551191898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ai12_mackei_en_5.1.1_3.0_1694551191898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ai12_mackei","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ai12_mackei", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ai12_mackei| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/mackei/ai12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert6_en.md b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert6_en.md new file mode 100644 index 00000000000000..410b1b7c629140 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English akeylegalbert6 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: akeylegalbert6 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akeylegalbert6` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akeylegalbert6_en_5.1.1_3.0_1694557526003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akeylegalbert6_en_5.1.1_3.0_1694557526003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("akeylegalbert6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("akeylegalbert6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akeylegalbert6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AkeyLegalBert6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_en.md new file mode 100644 index 00000000000000..787d76eff64468 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English akeylegalbert BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: akeylegalbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akeylegalbert` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akeylegalbert_en_5.1.1_3.0_1694557063708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akeylegalbert_en_5.1.1_3.0_1694557063708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("akeylegalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("akeylegalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akeylegalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AkeyLegalBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_14epoch_en.md b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_14epoch_en.md new file mode 100644 index 00000000000000..6887e299659818 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_14epoch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English akeylegalbert_inscotus_and_ledgar_14epoch BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: akeylegalbert_inscotus_and_ledgar_14epoch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akeylegalbert_inscotus_and_ledgar_14epoch` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akeylegalbert_inscotus_and_ledgar_14epoch_en_5.1.1_3.0_1694554005394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akeylegalbert_inscotus_and_ledgar_14epoch_en_5.1.1_3.0_1694554005394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("akeylegalbert_inscotus_and_ledgar_14epoch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("akeylegalbert_inscotus_and_ledgar_14epoch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akeylegalbert_inscotus_and_ledgar_14epoch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AkeyLegalBert_inScotus_and_Ledgar_14epoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_en.md b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_en.md new file mode 100644 index 00000000000000..9310c0c7819043 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-akeylegalbert_inscotus_and_ledgar_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English akeylegalbert_inscotus_and_ledgar BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: akeylegalbert_inscotus_and_ledgar +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akeylegalbert_inscotus_and_ledgar` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akeylegalbert_inscotus_and_ledgar_en_5.1.1_3.0_1694553675259.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akeylegalbert_inscotus_and_ledgar_en_5.1.1_3.0_1694553675259.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("akeylegalbert_inscotus_and_ledgar","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("akeylegalbert_inscotus_and_ledgar", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akeylegalbert_inscotus_and_ledgar| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AkeyLegalBert_inScotus_and_Ledgar \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-algarlegalbert1_large_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-12-algarlegalbert1_large_arabertv2_en.md new file mode 100644 index 00000000000000..d1a780f6d78143 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-algarlegalbert1_large_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegalbert1_large_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegalbert1_large_arabertv2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegalbert1_large_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegalbert1_large_arabertv2_en_5.1.1_3.0_1694555945058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegalbert1_large_arabertv2_en_5.1.1_3.0_1694555945058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegalbert1_large_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegalbert1_large_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegalbert1_large_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegalBert1-large-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabert256_flickr8k_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabert256_flickr8k_en.md new file mode 100644 index 00000000000000..e38d9b909ca658 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabert256_flickr8k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabert256_flickr8k BertEmbeddings from jontooy +author: John Snow Labs +name: arabert256_flickr8k +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert256_flickr8k` is a English model originally trained by jontooy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert256_flickr8k_en_5.1.1_3.0_1694510056959.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert256_flickr8k_en_5.1.1_3.0_1694510056959.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabert256_flickr8k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabert256_flickr8k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert256_flickr8k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.1 MB| + +## References + +https://huggingface.co/jontooy/AraBERT256-Flickr8k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabert32_coco_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabert32_coco_en.md new file mode 100644 index 00000000000000..c953572c2f84fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabert32_coco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabert32_coco BertEmbeddings from jontooy +author: John Snow Labs +name: arabert32_coco +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert32_coco` is a English model originally trained by jontooy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert32_coco_en_5.1.1_3.0_1694509854276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert32_coco_en_5.1.1_3.0_1694509854276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabert32_coco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabert32_coco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert32_coco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.1 MB| + +## References + +https://huggingface.co/jontooy/AraBERT32-COCO \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabert_c19_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabert_c19_ar.md new file mode 100644 index 00000000000000..3cff9a535af3ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabert_c19_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabert_c19 BertEmbeddings from moha +author: John Snow Labs +name: arabert_c19 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert_c19` is a Arabic model originally trained by moha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert_c19_ar_5.1.1_3.0_1694554366327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert_c19_ar_5.1.1_3.0_1694554366327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabert_c19","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabert_c19", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert_c19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/moha/arabert_c19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v1_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v1_ar.md new file mode 100644 index 00000000000000..adf89169ecb019 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v1_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v1 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v1 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v1` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v1_ar_5.1.1_3.0_1694509594398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v1_ar_5.1.1_3.0_1694509594398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v1","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v1", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.8 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v2_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v2_ar.md new file mode 100644 index 00000000000000..3d49800c8ab1f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v2_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v2 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v2 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v2` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v2_ar_5.1.1_3.0_1694509720231.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v2_ar_5.1.1_3.0_1694509720231.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v2","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v2", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.8 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v3_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v3_ar.md new file mode 100644 index 00000000000000..50e019c20dcb7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v3_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v3 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v3 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v3` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v3_ar_5.1.1_3.0_1694509862023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v3_ar_5.1.1_3.0_1694509862023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v3","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v3", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.8 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v4_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v4_ar.md new file mode 100644 index 00000000000000..2fa8ff3e650214 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v4_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v4 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v4 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v4` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v4_ar_5.1.1_3.0_1694510028864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v4_ar_5.1.1_3.0_1694510028864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v4","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v4", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v5_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v5_ar.md new file mode 100644 index 00000000000000..4525a6a58b56fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v5_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v5 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v5 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v5` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v5_ar_5.1.1_3.0_1694510170817.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v5_ar_5.1.1_3.0_1694510170817.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v5","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v5", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v6_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v6_ar.md new file mode 100644 index 00000000000000..737813848f622d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v6_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabertmo_base_v6 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v6 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v6` is a Arabic model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v6_ar_5.1.1_3.0_1694510331239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v6_ar_5.1.1_3.0_1694510331239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v6","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v6", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v7_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v7_en.md new file mode 100644 index 00000000000000..76058d595f77cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabertmo_base_v7 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v7 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v7` is a English model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v7_en_5.1.1_3.0_1694510465368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v7_en_5.1.1_3.0_1694510465368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v8_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v8_en.md new file mode 100644 index 00000000000000..71772712dcccf2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v8_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabertmo_base_v8 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v8 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v8` is a English model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v8_en_5.1.1_3.0_1694510601521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v8_en_5.1.1_3.0_1694510601521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v8| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v9_en.md b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v9_en.md new file mode 100644 index 00000000000000..56b5aea49b96a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabertmo_base_v9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabertmo_base_v9 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v9 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v9` is a English model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v9_en_5.1.1_3.0_1694510724445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v9_en_5.1.1_3.0_1694510724445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-arabic_quran_nahj_sahife_ar.md b/docs/_posts/ahmedlone127/2023-09-12-arabic_quran_nahj_sahife_ar.md new file mode 100644 index 00000000000000..f0a31f7ef38fa0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-arabic_quran_nahj_sahife_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arabic_quran_nahj_sahife BertEmbeddings from pourmand1376 +author: John Snow Labs +name: arabic_quran_nahj_sahife +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabic_quran_nahj_sahife` is a Arabic model originally trained by pourmand1376. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabic_quran_nahj_sahife_ar_5.1.1_3.0_1694547794347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabic_quran_nahj_sahife_ar_5.1.1_3.0_1694547794347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabic_quran_nahj_sahife","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabic_quran_nahj_sahife", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabic_quran_nahj_sahife| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|412.0 MB| + +## References + +https://huggingface.co/pourmand1376/arabic-quran-nahj-sahife \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-aristoberto_en.md b/docs/_posts/ahmedlone127/2023-09-12-aristoberto_en.md new file mode 100644 index 00000000000000..10d30d7d6fd037 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-aristoberto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English aristoberto BertEmbeddings from Jacobo +author: John Snow Labs +name: aristoberto +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aristoberto` is a English model originally trained by Jacobo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aristoberto_en_5.1.1_3.0_1694561096224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aristoberto_en_5.1.1_3.0_1694561096224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("aristoberto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("aristoberto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aristoberto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.1 MB| + +## References + +https://huggingface.co/Jacobo/aristoBERTo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-astrobert_en.md b/docs/_posts/ahmedlone127/2023-09-12-astrobert_en.md new file mode 100644 index 00000000000000..ce4bf012e03b87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-astrobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English astrobert BertEmbeddings from adsabs +author: John Snow Labs +name: astrobert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`astrobert` is a English model originally trained by adsabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/astrobert_en_5.1.1_3.0_1694554976289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/astrobert_en_5.1.1_3.0_1694554976289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("astrobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("astrobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|astrobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.6 MB| + +## References + +https://huggingface.co/adsabs/astroBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-baby_model_en.md b/docs/_posts/ahmedlone127/2023-09-12-baby_model_en.md new file mode 100644 index 00000000000000..10d7b3bacb6524 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-baby_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English baby_model BertEmbeddings from patNike +author: John Snow Labs +name: baby_model +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`baby_model` is a English model originally trained by patNike. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/baby_model_en_5.1.1_3.0_1694562898992.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/baby_model_en_5.1.1_3.0_1694562898992.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("baby_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("baby_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|baby_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/patNike/baby_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-berel_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-berel_base_en.md new file mode 100644 index 00000000000000..8560c91596e653 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-berel_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English berel_base BertEmbeddings from t4-project +author: John Snow Labs +name: berel_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berel_base` is a English model originally trained by t4-project. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berel_base_en_5.1.1_3.0_1694559343111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berel_base_en_5.1.1_3.0_1694559343111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("berel_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("berel_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berel_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|690.1 MB| + +## References + +https://huggingface.co/t4-project/BEREL-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1850_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1850_en.md new file mode 100644 index 00000000000000..520ec650e3ca92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1850_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1760_1850 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1760_1850 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1760_1850` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1760_1850_en_5.1.1_3.0_1694561341595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1760_1850_en_5.1.1_3.0_1694561341595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1760_1850","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1760_1850", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1760_1850| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1760_1850 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1900_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1900_en.md new file mode 100644 index 00000000000000..452403dad636d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1760_1900_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1760_1900 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1760_1900 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1760_1900` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1760_1900_en_5.1.1_3.0_1694561483109.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1760_1900_en_5.1.1_3.0_1694561483109.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1760_1900","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1760_1900", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1760_1900| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1760_1900 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1850_1875_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1850_1875_en.md new file mode 100644 index 00000000000000..82f7734b491242 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1850_1875_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1850_1875 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1850_1875 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1850_1875` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1850_1875_en_5.1.1_3.0_1694561623490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1850_1875_en_5.1.1_3.0_1694561623490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1850_1875","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1850_1875", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1850_1875| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1850_1875 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1875_1890_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1875_1890_en.md new file mode 100644 index 00000000000000..f4f5507f9b6045 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1875_1890_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1875_1890 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1875_1890 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1875_1890` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1875_1890_en_5.1.1_3.0_1694561751322.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1875_1890_en_5.1.1_3.0_1694561751322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1875_1890","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1875_1890", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1875_1890| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1875_1890 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_1890_1900_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_1890_1900_en.md new file mode 100644 index 00000000000000..9aab57cf704483 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_1890_1900_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_1890_1900 BertEmbeddings from Livingwithmachines +author: John Snow Labs +name: bert_1890_1900 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_1890_1900` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_1890_1900_en_5.1.1_3.0_1694561889659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_1890_1900_en_5.1.1_3.0_1694561889659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_1890_1900","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_1890_1900", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_1890_1900| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Livingwithmachines/bert_1890_1900 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_ancient_chinese_zh.md b/docs/_posts/ahmedlone127/2023-09-12-bert_ancient_chinese_zh.md new file mode 100644 index 00000000000000..3bfe0f08bfacef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_ancient_chinese_zh.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Chinese bert_ancient_chinese BertEmbeddings from Jihuai +author: John Snow Labs +name: bert_ancient_chinese +date: 2023-09-12 +tags: [bert, zh, open_source, fill_mask, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ancient_chinese` is a Chinese model originally trained by Jihuai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ancient_chinese_zh_5.1.1_3.0_1694547641835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ancient_chinese_zh_5.1.1_3.0_1694547641835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_ancient_chinese","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_ancient_chinese", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ancient_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|430.5 MB| + +## References + +https://huggingface.co/Jihuai/bert-ancient-chinese \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_10lang_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_10lang_cased_xx.md new file mode 100644 index 00000000000000..630eef5ec4eeb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_10lang_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_10lang_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_10lang_cased +date: 2023-09-12 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_10lang_cased` is a Multilingual model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_10lang_cased_xx_5.1.1_3.0_1694549523236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_10lang_cased_xx_5.1.1_3.0_1694549523236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_10lang_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_10lang_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_10lang_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|514.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-10lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_15lang_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_15lang_cased_xx.md new file mode 100644 index 00000000000000..63be7c86770505 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_15lang_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_15lang_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_15lang_cased +date: 2023-09-12 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_15lang_cased` is a Multilingual model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_15lang_cased_xx_5.1.1_3.0_1694549707123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_15lang_cased_xx_5.1.1_3.0_1694549707123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_15lang_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_15lang_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_15lang_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|526.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-15lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_25lang_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_25lang_cased_en.md new file mode 100644 index 00000000000000..c84718c61a1780 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_25lang_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_25lang_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_25lang_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_25lang_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_25lang_cased_en_5.1.1_3.0_1694549898376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_25lang_cased_en_5.1.1_3.0_1694549898376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_25lang_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_25lang_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_25lang_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|565.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-25lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_divehi_v2_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_divehi_v2_en.md new file mode 100644 index 00000000000000..5f0da46f695097 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_divehi_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_divehi_v2 BertEmbeddings from mahfooz +author: John Snow Labs +name: bert_base_cased_divehi_v2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_divehi_v2` is a English model originally trained by mahfooz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_divehi_v2_en_5.1.1_3.0_1694549387188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_divehi_v2_en_5.1.1_3.0_1694549387188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_divehi_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_divehi_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_divehi_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/mahfooz/bert-base-cased-dv-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_finetuned_en.md new file mode 100644 index 00000000000000..612e93eb431eda --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned BertEmbeddings from GusNicho +author: John Snow Labs +name: bert_base_cased_finetuned +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned` is a English model originally trained by GusNicho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_en_5.1.1_3.0_1694559327612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_en_5.1.1_3.0_1694559327612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/GusNicho/bert-base-cased-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_portuguese_ccorpus_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_portuguese_ccorpus_en.md new file mode 100644 index 00000000000000..4683925cb34f5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_portuguese_ccorpus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_portuguese_ccorpus BertEmbeddings from rosimeirecosta +author: John Snow Labs +name: bert_base_cased_portuguese_ccorpus +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_portuguese_ccorpus` is a English model originally trained by rosimeirecosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_ccorpus_en_5.1.1_3.0_1694553390707.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_ccorpus_en_5.1.1_3.0_1694553390707.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_portuguese_ccorpus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_portuguese_ccorpus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_portuguese_ccorpus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/rosimeirecosta/bert-base-cased-pt-ccorpus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_test_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_test_en.md new file mode 100644 index 00000000000000..6257d2040102f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_cased_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_test BertEmbeddings from chenyu313 +author: John Snow Labs +name: bert_base_cased_test +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_test` is a English model originally trained by chenyu313. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_test_en_5.1.1_3.0_1694554576959.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_test_en_5.1.1_3.0_1694554576959.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/chenyu313/bert-base-cased-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_germanic_languages_nl.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_germanic_languages_nl.md new file mode 100644 index 00000000000000..85066ea191d1c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_germanic_languages_nl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Dutch, Flemish bert_base_dutch_cased_finetuned_germanic_languages BertEmbeddings from GeniusVoice +author: John Snow Labs +name: bert_base_dutch_cased_finetuned_germanic_languages +date: 2023-09-12 +tags: [bert, nl, open_source, fill_mask, onnx] +task: Embeddings +language: nl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_finetuned_germanic_languages` is a Dutch, Flemish model originally trained by GeniusVoice. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_germanic_languages_nl_5.1.1_3.0_1694549349357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_germanic_languages_nl_5.1.1_3.0_1694549349357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased_finetuned_germanic_languages","nl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased_finetuned_germanic_languages", "nl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_finetuned_germanic_languages| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|nl| +|Size:|406.8 MB| + +## References + +https://huggingface.co/GeniusVoice/bert-base-dutch-cased-finetuned-gem \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_mark_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_mark_en.md new file mode 100644 index 00000000000000..1ea2c73664ac48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_finetuned_mark_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_dutch_cased_finetuned_mark BertEmbeddings from markverschuren +author: John Snow Labs +name: bert_base_dutch_cased_finetuned_mark +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_finetuned_mark` is a English model originally trained by markverschuren. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_mark_en_5.1.1_3.0_1694551719944.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_mark_en_5.1.1_3.0_1694551719944.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased_finetuned_mark","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased_finetuned_mark", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_finetuned_mark| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/markverschuren/bert-base-dutch-cased-finetuned-mark \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_gronlp_nl.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_gronlp_nl.md new file mode 100644 index 00000000000000..b8755890f4c0e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_dutch_cased_gronlp_nl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Dutch, Flemish bert_base_dutch_cased_gronlp BertEmbeddings from GroNLP +author: John Snow Labs +name: bert_base_dutch_cased_gronlp +date: 2023-09-12 +tags: [bert, nl, open_source, fill_mask, onnx] +task: Embeddings +language: nl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_gronlp` is a Dutch, Flemish model originally trained by GroNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_gronlp_nl_5.1.1_3.0_1694559021052.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_gronlp_nl_5.1.1_3.0_1694559021052.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased_gronlp","nl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased_gronlp", "nl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_gronlp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|nl| +|Size:|406.8 MB| + +## References + +https://huggingface.co/GroNLP/bert-base-dutch-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_arabic_cased_en.md new file mode 100644 index 00000000000000..0da02178e7a8f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_arabic_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_arabic_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_arabic_cased_en_5.1.1_3.0_1694550500308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_arabic_cased_en_5.1.1_3.0_1694550500308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_arabic_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_bulgarian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_bulgarian_cased_en.md new file mode 100644 index 00000000000000..498ecc8acdce6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_bulgarian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_bulgarian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_bulgarian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_bulgarian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_bulgarian_cased_en_5.1.1_3.0_1694550674451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_bulgarian_cased_en_5.1.1_3.0_1694550674451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_bulgarian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_bulgarian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_bulgarian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.0 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-bg-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_cased_en.md new file mode 100644 index 00000000000000..109b67366824be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_chinese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_chinese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_chinese_cased_en_5.1.1_3.0_1694556350417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_chinese_cased_en_5.1.1_3.0_1694556350417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_chinese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.8 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_hindi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_hindi_cased_en.md new file mode 100644 index 00000000000000..5d9a398859b97b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_chinese_hindi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_chinese_hindi_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_chinese_hindi_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_chinese_hindi_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_chinese_hindi_cased_en_5.1.1_3.0_1694556485723.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_chinese_hindi_cased_en_5.1.1_3.0_1694556485723.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_chinese_hindi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_chinese_hindi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_chinese_hindi_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|426.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-zh-hi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_danish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_danish_cased_en.md new file mode 100644 index 00000000000000..73ec5ce0f00e26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_danish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_danish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_danish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_danish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_danish_cased_en_5.1.1_3.0_1694550943639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_danish_cased_en_5.1.1_3.0_1694550943639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_danish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_danish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_danish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|414.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-da-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_dutch_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_dutch_cased_en.md new file mode 100644 index 00000000000000..b37e0aeaf11003 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_dutch_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_dutch_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_dutch_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_dutch_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_dutch_cased_en_5.1.1_3.0_1694554396036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_dutch_cased_en_5.1.1_3.0_1694554396036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_dutch_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_dutch_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_dutch_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|415.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-nl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_arabic_cased_en.md new file mode 100644 index 00000000000000..bc901d17436626 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_arabic_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_arabic_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_arabic_cased_en_5.1.1_3.0_1694551872020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_arabic_cased_en_5.1.1_3.0_1694551872020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_arabic_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|426.0 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_cased_en.md new file mode 100644 index 00000000000000..505fc85204dfd4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_cased_en_5.1.1_3.0_1694552022056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_cased_en_5.1.1_3.0_1694552022056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|416.5 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_cased_en.md new file mode 100644 index 00000000000000..90a546f334b71d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_chinese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_chinese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_chinese_cased_en_5.1.1_3.0_1694553556344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_chinese_cased_en_5.1.1_3.0_1694553556344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_chinese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|435.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_japanese_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_japanese_vietnamese_cased_en.md new file mode 100644 index 00000000000000..b30cb7c2282886 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_chinese_japanese_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_chinese_japanese_vietnamese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_chinese_japanese_vietnamese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_chinese_japanese_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_chinese_japanese_vietnamese_cased_en_5.1.1_3.0_1694553703735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_chinese_japanese_vietnamese_cased_en_5.1.1_3.0_1694553703735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_chinese_japanese_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_chinese_japanese_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_chinese_japanese_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|446.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-zh-ja-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_danish_japanese_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_danish_japanese_vietnamese_cased_en.md new file mode 100644 index 00000000000000..c9f7fefe5c89ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_danish_japanese_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_danish_japanese_vietnamese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_danish_japanese_vietnamese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_danish_japanese_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_danish_japanese_vietnamese_cased_en_5.1.1_3.0_1694552158682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_danish_japanese_vietnamese_cased_en_5.1.1_3.0_1694552158682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_danish_japanese_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_danish_japanese_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_danish_japanese_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|446.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-da-ja-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_dutch_russian_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_dutch_russian_arabic_cased_en.md new file mode 100644 index 00000000000000..d3d6b817004f00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_dutch_russian_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_dutch_russian_arabic_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_dutch_russian_arabic_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_dutch_russian_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_dutch_russian_arabic_cased_en_5.1.1_3.0_1694553406628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_dutch_russian_arabic_cased_en_5.1.1_3.0_1694553406628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_dutch_russian_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_dutch_russian_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_dutch_russian_arabic_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|461.5 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-nl-ru-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_cased_en.md new file mode 100644 index 00000000000000..9b87ae583b200f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_german_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_german_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_german_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_german_cased_en_5.1.1_3.0_1694552319403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_german_cased_en_5.1.1_3.0_1694552319403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_german_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_german_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_german_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|432.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-de-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_norwegian_danish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_norwegian_danish_cased_en.md new file mode 100644 index 00000000000000..4673eebea8eaab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_german_norwegian_danish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_german_norwegian_danish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_german_norwegian_danish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_german_norwegian_danish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_german_norwegian_danish_cased_en_5.1.1_3.0_1694552478365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_german_norwegian_danish_cased_en_5.1.1_3.0_1694552478365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_german_norwegian_danish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_german_norwegian_danish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_german_norwegian_danish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|440.5 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-de-no-da-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_italian_cased_en.md new file mode 100644 index 00000000000000..f2fc131d2f6b9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_italian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_italian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_italian_cased_en_5.1.1_3.0_1694553098457.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_italian_cased_en_5.1.1_3.0_1694553098457.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|428.1 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_lithuanian_norwegian_polish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_lithuanian_norwegian_polish_cased_en.md new file mode 100644 index 00000000000000..b95580bffaeb7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_lithuanian_norwegian_polish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_lithuanian_norwegian_polish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_lithuanian_norwegian_polish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_lithuanian_norwegian_polish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_lithuanian_norwegian_polish_cased_en_5.1.1_3.0_1694553253099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_lithuanian_norwegian_polish_cased_en_5.1.1_3.0_1694553253099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_lithuanian_norwegian_polish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_lithuanian_norwegian_polish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_lithuanian_norwegian_polish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-lt-no-pl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_cased_en.md new file mode 100644 index 00000000000000..09ad6305d4fe16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_spanish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_spanish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_spanish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_cased_en_5.1.1_3.0_1694552634510.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_cased_en_5.1.1_3.0_1694552634510.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_spanish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_spanish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_spanish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|433.1 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-es-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_german_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_german_chinese_cased_en.md new file mode 100644 index 00000000000000..164b1bfa67175d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_german_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_spanish_german_chinese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_spanish_german_chinese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_spanish_german_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_german_chinese_cased_en_5.1.1_3.0_1694552792508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_german_chinese_cased_en_5.1.1_3.0_1694552792508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_spanish_german_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_spanish_german_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_spanish_german_chinese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|466.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-es-de-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_portuguese_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_portuguese_italian_cased_en.md new file mode 100644 index 00000000000000..4de6c38b51eb29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_french_spanish_portuguese_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_french_spanish_portuguese_italian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_french_spanish_portuguese_italian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_french_spanish_portuguese_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_portuguese_italian_cased_en_5.1.1_3.0_1694552951990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_french_spanish_portuguese_italian_cased_en_5.1.1_3.0_1694552951990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_french_spanish_portuguese_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_french_spanish_portuguese_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_french_spanish_portuguese_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|444.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-fr-es-pt-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_german_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_german_cased_en.md new file mode 100644 index 00000000000000..ab954c6eec1a23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_german_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_german_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_german_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_german_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_german_cased_en_5.1.1_3.0_1694551087783.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_german_cased_en_5.1.1_3.0_1694551087783.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_german_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_german_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_german_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.8 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-de-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_hindi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_hindi_cased_en.md new file mode 100644 index 00000000000000..dae53a7b0be28f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_hindi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_hindi_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_hindi_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_hindi_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_hindi_cased_en_5.1.1_3.0_1694553855877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_hindi_cased_en_5.1.1_3.0_1694553855877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_hindi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_hindi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_hindi_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-hi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_italian_cased_en.md new file mode 100644 index 00000000000000..c2e9d25b892aaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_italian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_italian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_italian_cased_en_5.1.1_3.0_1694553994811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_italian_cased_en_5.1.1_3.0_1694553994811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|417.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_japanese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_japanese_cased_en.md new file mode 100644 index 00000000000000..0f0c199d37599b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_japanese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_japanese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_japanese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_japanese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_japanese_cased_en_5.1.1_3.0_1694554115375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_japanese_cased_en_5.1.1_3.0_1694554115375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_japanese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_japanese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_japanese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|416.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ja-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_lithuanian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_lithuanian_cased_en.md new file mode 100644 index 00000000000000..6d3dc265adafc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_lithuanian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_lithuanian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_lithuanian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_lithuanian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_lithuanian_cased_en_5.1.1_3.0_1694554240405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_lithuanian_cased_en_5.1.1_3.0_1694554240405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_lithuanian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_lithuanian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_lithuanian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-lt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_norwegian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_norwegian_cased_en.md new file mode 100644 index 00000000000000..73cb0747dbf2bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_norwegian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_norwegian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_norwegian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_norwegian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_norwegian_cased_en_5.1.1_3.0_1694554556744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_norwegian_cased_en_5.1.1_3.0_1694554556744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_norwegian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_norwegian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_norwegian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|415.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-no-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_polish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_polish_cased_en.md new file mode 100644 index 00000000000000..372a14b4d7dd63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_polish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_polish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_polish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_polish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_polish_cased_en_5.1.1_3.0_1694554731414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_polish_cased_en_5.1.1_3.0_1694554731414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_polish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_polish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_polish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|417.9 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-pl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_portuguese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_portuguese_cased_en.md new file mode 100644 index 00000000000000..b378b9fc4d462f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_portuguese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_portuguese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_portuguese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_portuguese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_portuguese_cased_en_5.1.1_3.0_1694554858787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_portuguese_cased_en_5.1.1_3.0_1694554858787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_portuguese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_portuguese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|419.2 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-pt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_romanian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_romanian_cased_en.md new file mode 100644 index 00000000000000..cc7304f533b9b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_romanian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_romanian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_romanian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_romanian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_romanian_cased_en_5.1.1_3.0_1694555022603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_romanian_cased_en_5.1.1_3.0_1694555022603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_romanian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_romanian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_romanian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|413.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ro-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_russian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_russian_cased_en.md new file mode 100644 index 00000000000000..d67d3c6d1b9729 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_russian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_russian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_russian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_russian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_russian_cased_en_5.1.1_3.0_1694555197411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_russian_cased_en_5.1.1_3.0_1694555197411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_russian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_russian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_russian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|428.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ru-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_cased_en.md new file mode 100644 index 00000000000000..1053c12c4e75ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_spanish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_spanish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_spanish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_cased_en_5.1.1_3.0_1694551237571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_cased_en_5.1.1_3.0_1694551237571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_spanish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_spanish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_spanish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|422.2 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-es-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_chinese_cased_en.md new file mode 100644 index 00000000000000..a45780d7531714 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_spanish_chinese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_spanish_chinese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_spanish_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_chinese_cased_en_5.1.1_3.0_1694551698031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_chinese_cased_en_5.1.1_3.0_1694551698031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_spanish_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_spanish_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_spanish_chinese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-es-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_italian_cased_en.md new file mode 100644 index 00000000000000..b6b4c68f82395d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_spanish_italian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_spanish_italian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_spanish_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_italian_cased_en_5.1.1_3.0_1694551412882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_italian_cased_en_5.1.1_3.0_1694551412882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_spanish_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_spanish_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_spanish_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|431.6 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-es-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_portuguese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_portuguese_cased_en.md new file mode 100644 index 00000000000000..bd6389c9cac801 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_spanish_portuguese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_spanish_portuguese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_spanish_portuguese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_spanish_portuguese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_portuguese_cased_en_5.1.1_3.0_1694551563082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_spanish_portuguese_cased_en_5.1.1_3.0_1694551563082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_spanish_portuguese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_spanish_portuguese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_spanish_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|428.0 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-es-pt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_swahili_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_swahili_cased_en.md new file mode 100644 index 00000000000000..e87b3d8cf83294 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_swahili_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_swahili_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_swahili_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_swahili_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_swahili_cased_en_5.1.1_3.0_1694555358059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_swahili_cased_en_5.1.1_3.0_1694555358059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_swahili_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_swahili_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_swahili_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-sw-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_thai_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_thai_cased_en.md new file mode 100644 index 00000000000000..f6aa44acd672a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_thai_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_thai_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_thai_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_thai_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_thai_cased_en_5.1.1_3.0_1694555543000.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_thai_cased_en_5.1.1_3.0_1694555543000.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_thai_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_thai_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_thai_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|404.3 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-th-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_turkish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_turkish_cased_en.md new file mode 100644 index 00000000000000..2da371c0b7b62d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_turkish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_turkish_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_turkish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_turkish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_turkish_cased_en_5.1.1_3.0_1694555686626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_turkish_cased_en_5.1.1_3.0_1694555686626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_turkish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_turkish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_turkish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-tr-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_ukrainian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_ukrainian_cased_en.md new file mode 100644 index 00000000000000..95024470e16b7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_ukrainian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_ukrainian_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_ukrainian_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_ukrainian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_ukrainian_cased_en_5.1.1_3.0_1694555899898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_ukrainian_cased_en_5.1.1_3.0_1694555899898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_ukrainian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_ukrainian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_ukrainian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|422.5 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-uk-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_urdu_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_urdu_cased_en.md new file mode 100644 index 00000000000000..a7f82d7e7ca9a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_urdu_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_urdu_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_urdu_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_urdu_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_urdu_cased_en_5.1.1_3.0_1694556060152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_urdu_cased_en_5.1.1_3.0_1694556060152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_urdu_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_urdu_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_urdu_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.7 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-ur-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_vietnamese_cased_en.md new file mode 100644 index 00000000000000..d28523b7e7bd4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_english_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_english_vietnamese_cased BertEmbeddings from Geotrend +author: John Snow Labs +name: bert_base_english_vietnamese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_english_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_english_vietnamese_cased_en_5.1.1_3.0_1694556202106.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_english_vietnamese_cased_en_5.1.1_3.0_1694556202106.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_english_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_english_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_english_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.8 MB| + +## References + +https://huggingface.co/Geotrend/bert-base-en-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_galician_cased_gl.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_galician_cased_gl.md new file mode 100644 index 00000000000000..3683260ef29e77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_galician_cased_gl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Galician bert_base_galician_cased BertEmbeddings from marcosgg +author: John Snow Labs +name: bert_base_galician_cased +date: 2023-09-12 +tags: [bert, gl, open_source, fill_mask, onnx] +task: Embeddings +language: gl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_galician_cased` is a Galician model originally trained by marcosgg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_galician_cased_gl_5.1.1_3.0_1694551516731.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_galician_cased_gl_5.1.1_3.0_1694551516731.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_galician_cased","gl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_galician_cased", "gl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_galician_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|gl| +|Size:|664.5 MB| + +## References + +https://huggingface.co/marcosgg/bert-base-gl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_german_dbmdz_uncased_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_german_dbmdz_uncased_german_en.md new file mode 100644 index 00000000000000..3e20cf82c3cde5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_german_dbmdz_uncased_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_german_dbmdz_uncased_german BertEmbeddings from koala +author: John Snow Labs +name: bert_base_german_dbmdz_uncased_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_dbmdz_uncased_german` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_dbmdz_uncased_german_en_5.1.1_3.0_1694508300297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_dbmdz_uncased_german_en_5.1.1_3.0_1694508300297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_dbmdz_uncased_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_dbmdz_uncased_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_dbmdz_uncased_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/koala/bert-base-german-dbmdz-uncased-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_greek_uncased_v1_el.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_greek_uncased_v1_el.md new file mode 100644 index 00000000000000..2a200fdac65287 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_greek_uncased_v1_el.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Modern Greek (1453-) bert_base_greek_uncased_v1 BertEmbeddings from nlpaueb +author: John Snow Labs +name: bert_base_greek_uncased_v1 +date: 2023-09-12 +tags: [bert, el, open_source, fill_mask, onnx] +task: Embeddings +language: el +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_greek_uncased_v1` is a Modern Greek (1453-) model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v1_el_5.1.1_3.0_1694561091912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v1_el_5.1.1_3.0_1694561091912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_greek_uncased_v1","el") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_greek_uncased_v1", "el") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_greek_uncased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|el| +|Size:|421.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-greek-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_ko.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_ko.md new file mode 100644 index 00000000000000..97429141244fd0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean bert_base BertEmbeddings from klue +author: John Snow Labs +name: bert_base +date: 2023-09-12 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base` is a Korean model originally trained by klue. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_ko_5.1.1_3.0_1694508175577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_ko_5.1.1_3.0_1694508175577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|412.4 MB| + +## References + +https://huggingface.co/klue/bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_en.md new file mode 100644 index 00000000000000..79a8300797a022 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_parsbert_uncased BertEmbeddings from HooshvareLab +author: John Snow Labs +name: bert_base_parsbert_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_parsbert_uncased` is a English model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_en_5.1.1_3.0_1694559679646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_en_5.1.1_3.0_1694559679646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_parsbert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_parsbert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_parsbert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.4 MB| + +## References + +https://huggingface.co/HooshvareLab/bert-base-parsbert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en.md new file mode 100644 index 00000000000000..e117fc540fd68e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_parsbert_uncased_finetuned_conditioned_khorshid BertEmbeddings from Hamid-reza +author: John Snow Labs +name: bert_base_parsbert_uncased_finetuned_conditioned_khorshid +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_parsbert_uncased_finetuned_conditioned_khorshid` is a English model originally trained by Hamid-reza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en_5.1.1_3.0_1694552930772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_conditioned_khorshid_en_5.1.1_3.0_1694552930772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_parsbert_uncased_finetuned_conditioned_khorshid","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_parsbert_uncased_finetuned_conditioned_khorshid", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_parsbert_uncased_finetuned_conditioned_khorshid| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.3 MB| + +## References + +https://huggingface.co/Hamid-reza/bert-base-parsbert-uncased-finetuned-conditioned-khorshid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en.md new file mode 100644 index 00000000000000..854f5c741285c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_parsbert_uncased_finetuned_khorshid_accelerate BertEmbeddings from Hamid-reza +author: John Snow Labs +name: bert_base_parsbert_uncased_finetuned_khorshid_accelerate +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_parsbert_uncased_finetuned_khorshid_accelerate` is a English model originally trained by Hamid-reza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en_5.1.1_3.0_1694551419310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_khorshid_accelerate_en_5.1.1_3.0_1694551419310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_parsbert_uncased_finetuned_khorshid_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_parsbert_uncased_finetuned_khorshid_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_parsbert_uncased_finetuned_khorshid_accelerate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.3 MB| + +## References + +https://huggingface.co/Hamid-reza/bert-base-parsbert-uncased-finetuned-khorshid-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_en.md new file mode 100644 index 00000000000000..dd74fe05f010f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_parsbert_uncased_finetuned_khorshid_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_parsbert_uncased_finetuned_khorshid BertEmbeddings from Hamid-reza +author: John Snow Labs +name: bert_base_parsbert_uncased_finetuned_khorshid +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_parsbert_uncased_finetuned_khorshid` is a English model originally trained by Hamid-reza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_khorshid_en_5.1.1_3.0_1694551198123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_parsbert_uncased_finetuned_khorshid_en_5.1.1_3.0_1694551198123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_parsbert_uncased_finetuned_khorshid","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_parsbert_uncased_finetuned_khorshid", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_parsbert_uncased_finetuned_khorshid| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.4 MB| + +## References + +https://huggingface.co/Hamid-reza/bert-base-parsbert-uncased-finetuned-khorshid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_finetuned_acordao_v2_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_finetuned_acordao_v2_en.md new file mode 100644 index 00000000000000..44b9287425c3f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_finetuned_acordao_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_portuguese_cased_finetuned_acordao_v2 BertEmbeddings from ederkamphorst +author: John Snow Labs +name: bert_base_portuguese_cased_finetuned_acordao_v2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_finetuned_acordao_v2` is a English model originally trained by ederkamphorst. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_acordao_v2_en_5.1.1_3.0_1694508805037.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_acordao_v2_en_5.1.1_3.0_1694508805037.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_finetuned_acordao_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_finetuned_acordao_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_finetuned_acordao_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ederkamphorst/bert-base-portuguese-cased-finetuned-acordao_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_neuralmind_pt.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_neuralmind_pt.md new file mode 100644 index 00000000000000..edd95378a27a10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_portuguese_cased_neuralmind_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_base_portuguese_cased_neuralmind BertEmbeddings from neuralmind +author: John Snow Labs +name: bert_base_portuguese_cased_neuralmind +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_neuralmind` is a Portuguese model originally trained by neuralmind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_neuralmind_pt_5.1.1_3.0_1694557881484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_neuralmind_pt_5.1.1_3.0_1694557881484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_neuralmind","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_neuralmind", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_neuralmind| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/neuralmind/bert-base-portuguese-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_swedish_cased_kb_sv.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_swedish_cased_kb_sv.md new file mode 100644 index 00000000000000..23250a730a4ea3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_swedish_cased_kb_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish bert_base_swedish_cased_kb BertEmbeddings from KB +author: John Snow Labs +name: bert_base_swedish_cased_kb +date: 2023-09-12 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_cased_kb` is a Swedish model originally trained by KB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_kb_sv_5.1.1_3.0_1694510194246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_kb_sv_5.1.1_3.0_1694510194246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_swedish_cased_kb","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_swedish_cased_kb", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_cased_kb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|465.2 MB| + +## References + +https://huggingface.co/KB/bert-base-swedish-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_tapt_govreport_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_tapt_govreport_en.md new file mode 100644 index 00000000000000..a31f17cef5f6b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_tapt_govreport_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_tapt_govreport BertEmbeddings from eliolio +author: John Snow Labs +name: bert_base_tapt_govreport +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_tapt_govreport` is a English model originally trained by eliolio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_tapt_govreport_en_5.1.1_3.0_1694562890971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_tapt_govreport_en_5.1.1_3.0_1694562890971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_tapt_govreport","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_tapt_govreport", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_tapt_govreport| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/eliolio/bert-base-tapt-govreport \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_echr_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_echr_en.md new file mode 100644 index 00000000000000..fc5fa653d2dea5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_echr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_echr BertEmbeddings from nlpaueb +author: John Snow Labs +name: bert_base_uncased_echr +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_echr` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_echr_en_5.1.1_3.0_1694561249416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_echr_en_5.1.1_3.0_1694561249416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_echr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_echr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_echr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-uncased-echr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_englishlawai_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_englishlawai_en.md new file mode 100644 index 00000000000000..02d277b3a73897 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_englishlawai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_englishlawai BertEmbeddings from Makabaka +author: John Snow Labs +name: bert_base_uncased_englishlawai +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_englishlawai` is a English model originally trained by Makabaka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_englishlawai_en_5.1.1_3.0_1694550163867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_englishlawai_en_5.1.1_3.0_1694550163867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_englishlawai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_englishlawai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_englishlawai| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Makabaka/bert-base-uncased-EnglishLawAI \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_eurlex_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_eurlex_en.md new file mode 100644 index 00000000000000..9460a87cc03e1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_eurlex_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_eurlex BertEmbeddings from nlpaueb +author: John Snow Labs +name: bert_base_uncased_eurlex +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_eurlex` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_eurlex_en_5.1.1_3.0_1694561402834.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_eurlex_en_5.1.1_3.0_1694561402834.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_eurlex","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_eurlex", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_eurlex| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/nlpaueb/bert-base-uncased-eurlex \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_academic_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_academic_en.md new file mode 100644 index 00000000000000..00b7d46ae5899b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_academic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_academic BertEmbeddings from egumasa +author: John Snow Labs +name: bert_base_uncased_finetuned_academic +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_academic` is a English model originally trained by egumasa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_academic_en_5.1.1_3.0_1694555134262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_academic_en_5.1.1_3.0_1694555134262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_academic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_academic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_academic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/egumasa/bert-base-uncased-finetuned-academic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_crypto_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_crypto_en.md new file mode 100644 index 00000000000000..5d06344969a462 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_finetuned_crypto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_crypto BertEmbeddings from smarquie +author: John Snow Labs +name: bert_base_uncased_finetuned_crypto +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_crypto` is a English model originally trained by smarquie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_crypto_en_5.1.1_3.0_1694562492423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_crypto_en_5.1.1_3.0_1694562492423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_crypto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_crypto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_crypto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/smarquie/bert-base-uncased-finetuned-crypto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_gujarati_128_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_gujarati_128_en.md new file mode 100644 index 00000000000000..c74ee99a926635 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_gujarati_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_gujarati_128 BertEmbeddings from mischi001 +author: John Snow Labs +name: bert_base_uncased_gujarati_128 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_gujarati_128` is a English model originally trained by mischi001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_gujarati_128_en_5.1.1_3.0_1694510645737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_gujarati_128_en_5.1.1_3.0_1694510645737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_gujarati_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_gujarati_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_gujarati_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/mischi001/bert-base-uncased-gu-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_dongyeop_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_dongyeop_en.md new file mode 100644 index 00000000000000..7ed36aba92f77e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_dongyeop_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_dongyeop BertEmbeddings from Dongyeop +author: John Snow Labs +name: bert_base_uncased_issues_128_dongyeop +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_dongyeop` is a English model originally trained by Dongyeop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_dongyeop_en_5.1.1_3.0_1694510297896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_dongyeop_en_5.1.1_3.0_1694510297896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_dongyeop","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_dongyeop", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_dongyeop| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Dongyeop/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_hrayrmsint_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_hrayrmsint_en.md new file mode 100644 index 00000000000000..27bf3aaa32a0d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_hrayrmsint_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_hrayrmsint BertEmbeddings from HrayrMSint +author: John Snow Labs +name: bert_base_uncased_issues_128_hrayrmsint +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_hrayrmsint` is a English model originally trained by HrayrMSint. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_hrayrmsint_en_5.1.1_3.0_1694549539463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_hrayrmsint_en_5.1.1_3.0_1694549539463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_hrayrmsint","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_hrayrmsint", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_hrayrmsint| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/HrayrMSint/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_isaacp_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_isaacp_en.md new file mode 100644 index 00000000000000..e1cc7c79e1f8da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_isaacp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_isaacp BertEmbeddings from Isaacp +author: John Snow Labs +name: bert_base_uncased_issues_128_isaacp +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_isaacp` is a English model originally trained by Isaacp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_isaacp_en_5.1.1_3.0_1694508451060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_isaacp_en_5.1.1_3.0_1694508451060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_isaacp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_isaacp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_isaacp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Isaacp/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_issues_128_lvwerra_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_issues_128_lvwerra_en.md new file mode 100644 index 00000000000000..38347519819ad8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_issues_128_lvwerra_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_issues_128_lvwerra BertEmbeddings from lvwerra +author: John Snow Labs +name: bert_base_uncased_issues_128_issues_128_lvwerra +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_issues_128_lvwerra` is a English model originally trained by lvwerra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_issues_128_lvwerra_en_5.1.1_3.0_1694550270689.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_issues_128_lvwerra_en_5.1.1_3.0_1694550270689.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_issues_128_lvwerra","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_issues_128_lvwerra", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_issues_128_lvwerra| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/lvwerra/bert-base-uncased-issues-128-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jangmin_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jangmin_en.md new file mode 100644 index 00000000000000..f1ad86721b3e5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jangmin_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_jangmin BertEmbeddings from jangmin +author: John Snow Labs +name: bert_base_uncased_issues_128_jangmin +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_jangmin` is a English model originally trained by jangmin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jangmin_en_5.1.1_3.0_1694554761560.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jangmin_en_5.1.1_3.0_1694554761560.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_jangmin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_jangmin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_jangmin| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jangmin/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_junghun_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_junghun_en.md new file mode 100644 index 00000000000000..e259aa105c6eb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_junghun_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_junghun BertEmbeddings from JungHun +author: John Snow Labs +name: bert_base_uncased_issues_128_junghun +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_junghun` is a English model originally trained by JungHun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_junghun_en_5.1.1_3.0_1694556714538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_junghun_en_5.1.1_3.0_1694556714538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_junghun","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_junghun", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_junghun| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/JungHun/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jx7789_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jx7789_en.md new file mode 100644 index 00000000000000..315c9204483781 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_jx7789_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_jx7789 BertEmbeddings from jx7789 +author: John Snow Labs +name: bert_base_uncased_issues_128_jx7789 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_jx7789` is a English model originally trained by jx7789. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jx7789_en_5.1.1_3.0_1694555592115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jx7789_en_5.1.1_3.0_1694555592115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_jx7789","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_jx7789", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_jx7789| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jx7789/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_kjunelee_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_kjunelee_en.md new file mode 100644 index 00000000000000..970d677c362d5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_kjunelee_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_kjunelee BertEmbeddings from kjunelee +author: John Snow Labs +name: bert_base_uncased_issues_128_kjunelee +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_kjunelee` is a English model originally trained by kjunelee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_kjunelee_en_5.1.1_3.0_1694552426494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_kjunelee_en_5.1.1_3.0_1694552426494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_kjunelee","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_kjunelee", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_kjunelee| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/kjunelee/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_transll_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_transll_en.md new file mode 100644 index 00000000000000..48de853cec6289 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_issues_128_transll_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_transll BertEmbeddings from TransLL +author: John Snow Labs +name: bert_base_uncased_issues_128_transll +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_transll` is a English model originally trained by TransLL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_transll_en_5.1.1_3.0_1694550129029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_transll_en_5.1.1_3.0_1694550129029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_transll","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_transll", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_transll| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/TransLL/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en.md new file mode 100644 index 00000000000000..abe6f9dcf72fe4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en_5.1.1_3.0_1694559998864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier_en_5.1.1_3.0_1694559998864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_mnli_sparse_70_unstructured_norwegian_classifier| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|225.9 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-mnli-sparse-70-unstructured-no-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mwesner_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mwesner_en.md new file mode 100644 index 00000000000000..83f70642955b72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_mwesner_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_mwesner BertEmbeddings from mwesner +author: John Snow Labs +name: bert_base_uncased_mwesner +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_mwesner` is a English model originally trained by mwesner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mwesner_en_5.1.1_3.0_1694556928581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mwesner_en_5.1.1_3.0_1694556928581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_mwesner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_mwesner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_mwesner| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/mwesner/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_rahuldave_issues_128_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_rahuldave_issues_128_en.md new file mode 100644 index 00000000000000..9357261bb64a02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_rahuldave_issues_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_rahuldave_issues_128 BertEmbeddings from rahuldave +author: John Snow Labs +name: bert_base_uncased_rahuldave_issues_128 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_rahuldave_issues_128` is a English model originally trained by rahuldave. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_rahuldave_issues_128_en_5.1.1_3.0_1694553801165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_rahuldave_issues_128_en_5.1.1_3.0_1694553801165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_rahuldave_issues_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_rahuldave_issues_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_rahuldave_issues_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/rahuldave/bert-base-uncased-rahuldave-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_70_unstructured_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_70_unstructured_en.md new file mode 100644 index 00000000000000..f8578c727b7524 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_70_unstructured_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_sparse_70_unstructured BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_sparse_70_unstructured +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sparse_70_unstructured` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_70_unstructured_en_5.1.1_3.0_1694560206490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_70_unstructured_en_5.1.1_3.0_1694560206490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_sparse_70_unstructured","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_sparse_70_unstructured", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sparse_70_unstructured| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|225.8 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-sparse-70-unstructured \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_85_unstructured_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_85_unstructured_pruneofa_en.md new file mode 100644 index 00000000000000..6f7d439896013e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_85_unstructured_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_sparse_85_unstructured_pruneofa BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_sparse_85_unstructured_pruneofa +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sparse_85_unstructured_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_85_unstructured_pruneofa_en_5.1.1_3.0_1694560356013.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_85_unstructured_pruneofa_en_5.1.1_3.0_1694560356013.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_sparse_85_unstructured_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_sparse_85_unstructured_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sparse_85_unstructured_pruneofa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|175.6 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-sparse-85-unstructured-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_90_unstructured_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_90_unstructured_pruneofa_en.md new file mode 100644 index 00000000000000..4a7bed5ce732b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_sparse_90_unstructured_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_sparse_90_unstructured_pruneofa BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_sparse_90_unstructured_pruneofa +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sparse_90_unstructured_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_90_unstructured_pruneofa_en_5.1.1_3.0_1694560534251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_90_unstructured_pruneofa_en_5.1.1_3.0_1694560534251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_sparse_90_unstructured_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_sparse_90_unstructured_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sparse_90_unstructured_pruneofa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|157.7 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-sparse-90-unstructured-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_transformers_github_128_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_transformers_github_128_en.md new file mode 100644 index 00000000000000..7d61bfb913f5c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_uncased_transformers_github_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_transformers_github_128 BertEmbeddings from GV05 +author: John Snow Labs +name: bert_base_uncased_transformers_github_128 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_transformers_github_128` is a English model originally trained by GV05. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_transformers_github_128_en_5.1.1_3.0_1694554427020.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_transformers_github_128_en_5.1.1_3.0_1694554427020.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_transformers_github_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_transformers_github_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_transformers_github_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/GV05/bert-base-uncased-transformers-github-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_vietnamese_uncased_vi.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_vietnamese_uncased_vi.md new file mode 100644 index 00000000000000..a399d814a7b57f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_vietnamese_uncased_vi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Vietnamese bert_base_vietnamese_uncased BertEmbeddings from tintnguyen +author: John Snow Labs +name: bert_base_vietnamese_uncased +date: 2023-09-12 +tags: [bert, vi, open_source, fill_mask, onnx] +task: Embeddings +language: vi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_vietnamese_uncased` is a Vietnamese model originally trained by tintnguyen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_vietnamese_uncased_vi_5.1.1_3.0_1694551029030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_vietnamese_uncased_vi_5.1.1_3.0_1694551029030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_vietnamese_uncased","vi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_vietnamese_uncased", "vi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_vietnamese_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|vi| +|Size:|536.8 MB| + +## References + +https://huggingface.co/tintnguyen/bert-base-vi-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_wikihow_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_wikihow_en.md new file mode 100644 index 00000000000000..fe2333ebca4769 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_wikihow_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_wikihow BertEmbeddings from Aktsvigun +author: John Snow Labs +name: bert_base_wikihow +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_wikihow` is a English model originally trained by Aktsvigun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_wikihow_en_5.1.1_3.0_1694553949077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_wikihow_en_5.1.1_3.0_1694553949077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_wikihow","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_wikihow", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_wikihow| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Aktsvigun/bert-base-wikihow \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_base_xsum_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_base_xsum_en.md new file mode 100644 index 00000000000000..0d79b071cbcc10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_base_xsum_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_xsum BertEmbeddings from Aktsvigun +author: John Snow Labs +name: bert_base_xsum +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_xsum` is a English model originally trained by Aktsvigun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_xsum_en_5.1.1_3.0_1694552050336.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_xsum_en_5.1.1_3.0_1694552050336.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_xsum","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_xsum", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_xsum| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Aktsvigun/bert-base-xsum \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_german_en.md new file mode 100644 index 00000000000000..d96fe467741491 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_c1_english_german BertEmbeddings from OpenSemShift +author: John Snow Labs +name: bert_c1_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_c1_english_german` is a English model originally trained by OpenSemShift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_c1_english_german_en_5.1.1_3.0_1694558764674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_c1_english_german_en_5.1.1_3.0_1694558764674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_c1_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_c1_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_c1_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/OpenSemShift/bert-c1-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_only_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_only_en.md new file mode 100644 index 00000000000000..efbc94a0ffb839 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_c1_english_only_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_c1_english_only BertEmbeddings from OpenSemShift +author: John Snow Labs +name: bert_c1_english_only +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_c1_english_only` is a English model originally trained by OpenSemShift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_c1_english_only_en_5.1.1_3.0_1694558574758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_c1_english_only_en_5.1.1_3.0_1694558574758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_c1_english_only","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_c1_english_only", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_c1_english_only| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/OpenSemShift/bert-c1-en-only \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_german_en.md new file mode 100644 index 00000000000000..f9bb973d3548b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_c2_english_german BertEmbeddings from OpenSemShift +author: John Snow Labs +name: bert_c2_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_c2_english_german` is a English model originally trained by OpenSemShift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_c2_english_german_en_5.1.1_3.0_1694558981307.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_c2_english_german_en_5.1.1_3.0_1694558981307.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_c2_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_c2_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_c2_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/OpenSemShift/bert-c2-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_only_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_only_en.md new file mode 100644 index 00000000000000..4ae205f09173fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_c2_english_only_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_c2_english_only BertEmbeddings from OpenSemShift +author: John Snow Labs +name: bert_c2_english_only +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_c2_english_only` is a English model originally trained by OpenSemShift. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_c2_english_only_en_5.1.1_3.0_1694560907370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_c2_english_only_en_5.1.1_3.0_1694560907370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_c2_english_only","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_c2_english_only", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_c2_english_only| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/OpenSemShift/bert-c2-en-only \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_clinical_scratch_wl_spanish_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_clinical_scratch_wl_spanish_en.md new file mode 100644 index 00000000000000..bdee3e9038fe12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_clinical_scratch_wl_spanish_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_clinical_scratch_wl_spanish BertEmbeddings from plncmm +author: John Snow Labs +name: bert_clinical_scratch_wl_spanish +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_clinical_scratch_wl_spanish` is a English model originally trained by plncmm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_clinical_scratch_wl_spanish_en_5.1.1_3.0_1694549252966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_clinical_scratch_wl_spanish_en_5.1.1_3.0_1694549252966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_clinical_scratch_wl_spanish","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_clinical_scratch_wl_spanish", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_clinical_scratch_wl_spanish| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/plncmm/bert-clinical-scratch-wl-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_e_base_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_e_base_mlm_en.md new file mode 100644 index 00000000000000..7516ad99fa0fdc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_e_base_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_e_base_mlm BertEmbeddings from nasa-impact +author: John Snow Labs +name: bert_e_base_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_e_base_mlm` is a English model originally trained by nasa-impact. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_e_base_mlm_en_5.1.1_3.0_1694557364630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_e_base_mlm_en_5.1.1_3.0_1694557364630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_e_base_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_e_base_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_e_base_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/nasa-impact/bert-e-base-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_emoji_latvian_twitter_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_emoji_latvian_twitter_en.md new file mode 100644 index 00000000000000..ead21b98623a7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_emoji_latvian_twitter_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_emoji_latvian_twitter BertEmbeddings from FFZG-cleopatra +author: John Snow Labs +name: bert_emoji_latvian_twitter +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_emoji_latvian_twitter` is a English model originally trained by FFZG-cleopatra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_emoji_latvian_twitter_en_5.1.1_3.0_1694548098064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_emoji_latvian_twitter_en_5.1.1_3.0_1694548098064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_emoji_latvian_twitter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_emoji_latvian_twitter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_emoji_latvian_twitter| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.2 MB| + +## References + +https://huggingface.co/FFZG-cleopatra/bert-emoji-latvian-twitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_finetuning_test_lian01110_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_finetuning_test_lian01110_en.md new file mode 100644 index 00000000000000..a6be7241b26233 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_finetuning_test_lian01110_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test_lian01110 BertEmbeddings from lian01110 +author: John Snow Labs +name: bert_finetuning_test_lian01110 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_lian01110` is a English model originally trained by lian01110. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_lian01110_en_5.1.1_3.0_1694548658763.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_lian01110_en_5.1.1_3.0_1694548658763.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test_lian01110","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test_lian01110", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_lian01110| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/lian01110/bert_finetuning_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_for_finacial_triples_completion_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_for_finacial_triples_completion_en.md new file mode 100644 index 00000000000000..10a56529d1883d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_for_finacial_triples_completion_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_for_finacial_triples_completion BertEmbeddings from reginaboateng +author: John Snow Labs +name: bert_for_finacial_triples_completion +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_for_finacial_triples_completion` is a English model originally trained by reginaboateng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_for_finacial_triples_completion_en_5.1.1_3.0_1694549182533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_for_finacial_triples_completion_en_5.1.1_3.0_1694549182533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_for_finacial_triples_completion","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_for_finacial_triples_completion", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_for_finacial_triples_completion| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/reginaboateng/bert_for_finacial_triples_completion \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_funting_test_ai10_niepan_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_funting_test_ai10_niepan_en.md new file mode 100644 index 00000000000000..2ac0ed5a6a7c05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_funting_test_ai10_niepan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_funting_test_ai10_niepan BertEmbeddings from niepan +author: John Snow Labs +name: bert_funting_test_ai10_niepan +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_funting_test_ai10_niepan` is a English model originally trained by niepan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_funting_test_ai10_niepan_en_5.1.1_3.0_1694558288937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_funting_test_ai10_niepan_en_5.1.1_3.0_1694558288937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_funting_test_ai10_niepan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_funting_test_ai10_niepan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_funting_test_ai10_niepan| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/niepan/bert_funting_test_ai10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_galician_gl.md b/docs/_posts/ahmedlone127/2023-09-12-bert_galician_gl.md new file mode 100644 index 00000000000000..c45ebe51884458 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_galician_gl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Galician bert_galician BertEmbeddings from fpuentes +author: John Snow Labs +name: bert_galician +date: 2023-09-12 +tags: [bert, gl, open_source, fill_mask, onnx] +task: Embeddings +language: gl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_galician` is a Galician model originally trained by fpuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_galician_gl_5.1.1_3.0_1694550784049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_galician_gl_5.1.1_3.0_1694550784049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_galician","gl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_galician", "gl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_galician| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|gl| +|Size:|409.0 MB| + +## References + +https://huggingface.co/fpuentes/bert-galician \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_hateful_memes_expanded_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_hateful_memes_expanded_en.md new file mode 100644 index 00000000000000..59b1251b4d0c10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_hateful_memes_expanded_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_hateful_memes_expanded BertEmbeddings from limjiayi +author: John Snow Labs +name: bert_hateful_memes_expanded +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_hateful_memes_expanded` is a English model originally trained by limjiayi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_hateful_memes_expanded_en_5.1.1_3.0_1694548862078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_hateful_memes_expanded_en_5.1.1_3.0_1694548862078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_hateful_memes_expanded","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_hateful_memes_expanded", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_hateful_memes_expanded| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/limjiayi/bert-hateful-memes-expanded \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_java_bfp_combined_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_java_bfp_combined_en.md new file mode 100644 index 00000000000000..ef3ec9b7562b4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_java_bfp_combined_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_java_bfp_combined BertEmbeddings from up201806461 +author: John Snow Labs +name: bert_java_bfp_combined +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_java_bfp_combined` is a English model originally trained by up201806461. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_java_bfp_combined_en_5.1.1_3.0_1694562355805.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_java_bfp_combined_en_5.1.1_3.0_1694562355805.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_java_bfp_combined","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_java_bfp_combined", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_java_bfp_combined| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/up201806461/bert-java-bfp_combined \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_en.md new file mode 100644 index 00000000000000..09a77ee8a25bf0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_khmer_base_uncased BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_khmer_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_khmer_base_uncased` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_khmer_base_uncased_en_5.1.1_3.0_1694548567530.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_khmer_base_uncased_en_5.1.1_3.0_1694548567530.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_khmer_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_khmer_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_khmer_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|413.0 MB| + +## References + +https://huggingface.co/GKLMIP/bert-khmer-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_tokenized_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_tokenized_en.md new file mode 100644 index 00000000000000..065f90f832fbd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_base_uncased_tokenized_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_khmer_base_uncased_tokenized BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_khmer_base_uncased_tokenized +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_khmer_base_uncased_tokenized` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_khmer_base_uncased_tokenized_en_5.1.1_3.0_1694548424527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_khmer_base_uncased_tokenized_en_5.1.1_3.0_1694548424527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_khmer_base_uncased_tokenized","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_khmer_base_uncased_tokenized", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_khmer_base_uncased_tokenized| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.9 MB| + +## References + +https://huggingface.co/GKLMIP/bert-khmer-base-uncased-tokenized \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_en.md new file mode 100644 index 00000000000000..5c0910e0b64983 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_khmer_small_uncased BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_khmer_small_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_khmer_small_uncased` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_khmer_small_uncased_en_5.1.1_3.0_1694548748669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_khmer_small_uncased_en_5.1.1_3.0_1694548748669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_khmer_small_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_khmer_small_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_khmer_small_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|110.2 MB| + +## References + +https://huggingface.co/GKLMIP/bert-khmer-small-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_tokenized_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_tokenized_en.md new file mode 100644 index 00000000000000..b1106d77e59c95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_khmer_small_uncased_tokenized_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_khmer_small_uncased_tokenized BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_khmer_small_uncased_tokenized +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_khmer_small_uncased_tokenized` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_khmer_small_uncased_tokenized_en_5.1.1_3.0_1694548658102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_khmer_small_uncased_tokenized_en_5.1.1_3.0_1694548658102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_khmer_small_uncased_tokenized","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_khmer_small_uncased_tokenized", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_khmer_small_uncased_tokenized| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|110.2 MB| + +## References + +https://huggingface.co/GKLMIP/bert-khmer-small-uncased-tokenized \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_korean_base_ko.md b/docs/_posts/ahmedlone127/2023-09-12-bert_korean_base_ko.md new file mode 100644 index 00000000000000..7bbf1ef5e79a2a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_korean_base_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean bert_korean_base BertEmbeddings from lassl +author: John Snow Labs +name: bert_korean_base +date: 2023-09-12 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_korean_base` is a Korean model originally trained by lassl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_korean_base_ko_5.1.1_3.0_1694547622935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_korean_base_ko_5.1.1_3.0_1694547622935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_korean_base","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_korean_base", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_korean_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|467.8 MB| + +## References + +https://huggingface.co/lassl/bert-ko-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_korean_small_ko.md b/docs/_posts/ahmedlone127/2023-09-12-bert_korean_small_ko.md new file mode 100644 index 00000000000000..1f09b7aebc3cb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_korean_small_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean bert_korean_small BertEmbeddings from lassl +author: John Snow Labs +name: bert_korean_small +date: 2023-09-12 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_korean_small` is a Korean model originally trained by lassl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_korean_small_ko_5.1.1_3.0_1694547734588.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_korean_small_ko_5.1.1_3.0_1694547734588.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_korean_small","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_korean_small", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_korean_small| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|85.6 MB| + +## References + +https://huggingface.co/lassl/bert-ko-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_english_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_english_en.md new file mode 100644 index 00000000000000..5e62b1afb3b957 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_english_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_english BertEmbeddings from koala +author: John Snow Labs +name: bert_large_cased_english +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_english` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_english_en_5.1.1_3.0_1694508535175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_english_en_5.1.1_3.0_1694508535175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_english| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/koala/bert-large-cased-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_finetuned_prompt_20_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_finetuned_prompt_20_en.md new file mode 100644 index 00000000000000..ba14f69c18358a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_finetuned_prompt_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_prompt_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_prompt_20 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_prompt_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_prompt_20_en_5.1.1_3.0_1694560618058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_prompt_20_en_5.1.1_3.0_1694560618058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_prompt_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_prompt_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_prompt_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-prompt-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_portuguese_law_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_portuguese_law_en.md new file mode 100644 index 00000000000000..6bef6ea8d03272 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_portuguese_law_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_portuguese_law BertEmbeddings from edwatanabe +author: John Snow Labs +name: bert_large_cased_portuguese_law +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_portuguese_law` is a English model originally trained by edwatanabe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_law_en_5.1.1_3.0_1694548002360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_law_en_5.1.1_3.0_1694548002360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_portuguese_law","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_portuguese_law", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_portuguese_law| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/edwatanabe/bert-large-cased-pt-law \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sclarge_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sclarge_en.md new file mode 100644 index 00000000000000..825ce080100fc7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sclarge_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sclarge BertEmbeddings from ZongqianLi +author: John Snow Labs +name: bert_large_cased_sclarge +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sclarge` is a English model originally trained by ZongqianLi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sclarge_en_5.1.1_3.0_1694562199422.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sclarge_en_5.1.1_3.0_1694562199422.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sclarge","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sclarge", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sclarge| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/ZongqianLi/bert_large_cased_sclarge \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en.md new file mode 100644 index 00000000000000..8876631c16104c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en_5.1.1_3.0_1694508360095.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10_en_5.1.1_3.0_1694508360095.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en.md new file mode 100644 index 00000000000000..8136d0ebbe7620 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en_5.1.1_3.0_1694509499566.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11_en_5.1.1_3.0_1694509499566.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_11| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-11 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en.md new file mode 100644 index 00000000000000..1319cc27ddebfc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en_5.1.1_3.0_1694510143392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12_en_5.1.1_3.0_1694510143392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_12| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en.md new file mode 100644 index 00000000000000..638809f31231ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en_5.1.1_3.0_1694510416192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13_en_5.1.1_3.0_1694510416192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_13| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en.md new file mode 100644 index 00000000000000..3ac5584420fff5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en_5.1.1_3.0_1694510686666.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14_en_5.1.1_3.0_1694510686666.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_14| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en.md new file mode 100644 index 00000000000000..ccb7015fb49c59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en_5.1.1_3.0_1694547583222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15_en_5.1.1_3.0_1694547583222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_15| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-15 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en.md new file mode 100644 index 00000000000000..569a02cfdcf537 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en_5.1.1_3.0_1694547909959.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16_en_5.1.1_3.0_1694547909959.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_16| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en.md new file mode 100644 index 00000000000000..efe1f33b166e14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en_5.1.1_3.0_1694548216024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17_en_5.1.1_3.0_1694548216024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_17| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-17 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en.md new file mode 100644 index 00000000000000..41a50555598393 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en_5.1.1_3.0_1694548498129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18_en_5.1.1_3.0_1694548498129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_18| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-18 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en.md new file mode 100644 index 00000000000000..d66544b26af1ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en_5.1.1_3.0_1694548783818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19_en_5.1.1_3.0_1694548783818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en.md new file mode 100644 index 00000000000000..267efcba9435ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en_5.1.1_3.0_1694508105221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9_en_5.1.1_3.0_1694508105221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_portuguese_cased_pt.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_portuguese_cased_pt.md new file mode 100644 index 00000000000000..3b5fd7ce34bba3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_portuguese_cased_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_large_portuguese_cased BertEmbeddings from neuralmind +author: John Snow Labs +name: bert_large_portuguese_cased +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_portuguese_cased` is a Portuguese model originally trained by neuralmind. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_portuguese_cased_pt_5.1.1_3.0_1694558116929.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_portuguese_cased_pt_5.1.1_3.0_1694558116929.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_portuguese_cased","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_portuguese_cased", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.2 GB| + +## References + +https://huggingface.co/neuralmind/bert-large-portuguese-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_bengali_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_bengali_en.md new file mode 100644 index 00000000000000..b839e31e6a0881 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_bengali_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_bengali BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_bengali +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_bengali` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_bengali_en_5.1.1_3.0_1694508782600.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_bengali_en_5.1.1_3.0_1694508782600.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_bengali","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_bengali", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_bengali| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-bn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_chinese_en.md new file mode 100644 index 00000000000000..4635e55be80a82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_chinese BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_chinese` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_chinese_en_5.1.1_3.0_1694510115547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_chinese_en_5.1.1_3.0_1694510115547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_english_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_english_en.md new file mode 100644 index 00000000000000..817bdf437e90dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_english_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_english BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_english +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_english` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_english_en_5.1.1_3.0_1694509318989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_english_en_5.1.1_3.0_1694509318989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_english| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_clinc150_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_clinc150_en.md new file mode 100644 index 00000000000000..9bd92193e94568 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_clinc150_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_clinc150 BertEmbeddings from FilippoComastri +author: John Snow Labs +name: bert_large_uncased_finetuned_clinc150 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_clinc150` is a English model originally trained by FilippoComastri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_clinc150_en_5.1.1_3.0_1694552611395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_clinc150_en_5.1.1_3.0_1694552611395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_clinc150","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_clinc150", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_clinc150| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/FilippoComastri/bert-large-uncased-finetuned-clinc150 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_20_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_20_en.md new file mode 100644 index 00000000000000..320e405c61ac4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_da_zero_shot_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_da_zero_shot_20 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_da_zero_shot_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_da_zero_shot_20_en_5.1.1_3.0_1694560241206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_da_zero_shot_20_en_5.1.1_3.0_1694560241206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_da_zero_shot_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_da_zero_shot_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_da_zero_shot_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-DA-Zero-shot-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_en.md new file mode 100644 index 00000000000000..d519bada948a6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_da_zero_shot_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_da_zero_shot BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_da_zero_shot +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_da_zero_shot` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_da_zero_shot_en_5.1.1_3.0_1694559743511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_da_zero_shot_en_5.1.1_3.0_1694559743511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_da_zero_shot","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_da_zero_shot", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_da_zero_shot| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-DA-Zero-shot \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..77acfb49b7ec77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_imdb BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_imdb +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_imdb` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_imdb_en_5.1.1_3.0_1694559416760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_imdb_en_5.1.1_3.0_1694559416760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_youcook_4_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_youcook_4_en.md new file mode 100644 index 00000000000000..50c15d66752ada --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_finetuned_youcook_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_youcook_4 BertEmbeddings from CennetOguz +author: John Snow Labs +name: bert_large_uncased_finetuned_youcook_4 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_youcook_4` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_youcook_4_en_5.1.1_3.0_1694560592308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_youcook_4_en_5.1.1_3.0_1694560592308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_youcook_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_youcook_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_youcook_4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/CennetOguz/bert-large-uncased-finetuned-youcook_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_german_en.md new file mode 100644 index 00000000000000..4df357b4bde208 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_german BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_german` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_german_en_5.1.1_3.0_1694509061010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_german_en_5.1.1_3.0_1694509061010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_hindi_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_hindi_en.md new file mode 100644 index 00000000000000..d47ae7e623ad40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_hindi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_hindi BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_hindi +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_hindi` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_hindi_en_5.1.1_3.0_1694509561973.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_hindi_en_5.1.1_3.0_1694509561973.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_hindi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_hindi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_hindi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-hi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_korean_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_korean_en.md new file mode 100644 index 00000000000000..e450ff959c84e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_large_uncased_korean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_korean BertEmbeddings from koala +author: John Snow Labs +name: bert_large_uncased_korean +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_korean` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_korean_en_5.1.1_3.0_1694509833455.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_korean_en_5.1.1_3.0_1694509833455.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_korean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_korean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_korean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/koala/bert-large-uncased-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_ltrc_telugu_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_ltrc_telugu_en.md new file mode 100644 index 00000000000000..009d029dcf02f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_ltrc_telugu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_ltrc_telugu BertEmbeddings from ltrctelugu +author: John Snow Labs +name: bert_ltrc_telugu +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ltrc_telugu` is a English model originally trained by ltrctelugu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ltrc_telugu_en_5.1.1_3.0_1694549907705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ltrc_telugu_en_5.1.1_3.0_1694549907705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_ltrc_telugu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_ltrc_telugu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ltrc_telugu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/ltrctelugu/bert_ltrc_telugu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_persian_poetry_fa.md b/docs/_posts/ahmedlone127/2023-09-12-bert_persian_poetry_fa.md new file mode 100644 index 00000000000000..9155037bc88afe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_persian_poetry_fa.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Persian bert_persian_poetry BertEmbeddings from mitra-mir +author: John Snow Labs +name: bert_persian_poetry +date: 2023-09-12 +tags: [bert, fa, open_source, fill_mask, onnx] +task: Embeddings +language: fa +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_persian_poetry` is a Persian model originally trained by mitra-mir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_persian_poetry_fa_5.1.1_3.0_1694553964697.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_persian_poetry_fa_5.1.1_3.0_1694553964697.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_persian_poetry","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_persian_poetry", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_persian_poetry| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fa| +|Size:|441.4 MB| + +## References + +https://huggingface.co/mitra-mir/BERT-Persian-Poetry \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_political_election2020_twitter_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_political_election2020_twitter_mlm_en.md new file mode 100644 index 00000000000000..0a2651b7446a9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_political_election2020_twitter_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_political_election2020_twitter_mlm BertEmbeddings from kornosk +author: John Snow Labs +name: bert_political_election2020_twitter_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_political_election2020_twitter_mlm` is a English model originally trained by kornosk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_political_election2020_twitter_mlm_en_5.1.1_3.0_1694510414367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_political_election2020_twitter_mlm_en_5.1.1_3.0_1694510414367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_political_election2020_twitter_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_political_election2020_twitter_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_political_election2020_twitter_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/kornosk/bert-political-election2020-twitter-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_tagalog_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_tagalog_base_uncased_en.md new file mode 100644 index 00000000000000..f2cdce9badceed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_tagalog_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_tagalog_base_uncased BertEmbeddings from GKLMIP +author: John Snow Labs +name: bert_tagalog_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tagalog_base_uncased` is a English model originally trained by GKLMIP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tagalog_base_uncased_en_5.1.1_3.0_1694549199633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tagalog_base_uncased_en_5.1.1_3.0_1694549199633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_tagalog_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_tagalog_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tagalog_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|469.7 MB| + +## References + +https://huggingface.co/GKLMIP/bert-tagalog-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_test_andychiang_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_test_andychiang_en.md new file mode 100644 index 00000000000000..3cb148360a70e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_test_andychiang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_test_andychiang BertEmbeddings from AndyChiang +author: John Snow Labs +name: bert_test_andychiang +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_test_andychiang` is a English model originally trained by AndyChiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_test_andychiang_en_5.1.1_3.0_1694559088930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_test_andychiang_en_5.1.1_3.0_1694559088930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_test_andychiang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_test_andychiang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_test_andychiang| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/AndyChiang/bert-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_finetuned_model_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_finetuned_model_en.md new file mode 100644 index 00000000000000..83cc82471ed04c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_finetuned_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_finetuned_model BertEmbeddings from harvinder676 +author: John Snow Labs +name: bert_uncased_finetuned_model +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_finetuned_model` is a English model originally trained by harvinder676. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_finetuned_model_en_5.1.1_3.0_1694556527374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_finetuned_model_en_5.1.1_3.0_1694556527374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_finetuned_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_finetuned_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_finetuned_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/harvinder676/bert-uncased-finetuned-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en.md b/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en.md new file mode 100644 index 00000000000000..df43c41eaccefa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_12_h_768_a_12_italian_alb3rt0 BertEmbeddings from m-polignano-uniba +author: John Snow Labs +name: bert_uncased_l_12_h_768_a_12_italian_alb3rt0 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_12_h_768_a_12_italian_alb3rt0` is a English model originally trained by m-polignano-uniba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en_5.1.1_3.0_1694550811678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_12_h_768_a_12_italian_alb3rt0_en_5.1.1_3.0_1694550811678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_12_h_768_a_12_italian_alb3rt0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_12_h_768_a_12_italian_alb3rt0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_12_h_768_a_12_italian_alb3rt0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|689.7 MB| + +## References + +https://huggingface.co/m-polignano-uniba/bert_uncased_L-12_H-768_A-12_italian_alb3rt0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_base_uncased_pt.md b/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_base_uncased_pt.md new file mode 100644 index 00000000000000..0140312ebe2270 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_base_uncased_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertabaporu_base_uncased BertEmbeddings from pablocosta +author: John Snow Labs +name: bertabaporu_base_uncased +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertabaporu_base_uncased` is a Portuguese model originally trained by pablocosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertabaporu_base_uncased_pt_5.1.1_3.0_1694556656111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertabaporu_base_uncased_pt_5.1.1_3.0_1694556656111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertabaporu_base_uncased","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertabaporu_base_uncased", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertabaporu_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|504.8 MB| + +## References + +https://huggingface.co/pablocosta/bertabaporu-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_large_uncased_pt.md b/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_large_uncased_pt.md new file mode 100644 index 00000000000000..f530999d960781 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertabaporu_large_uncased_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertabaporu_large_uncased BertEmbeddings from pablocosta +author: John Snow Labs +name: bertabaporu_large_uncased +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertabaporu_large_uncased` is a Portuguese model originally trained by pablocosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertabaporu_large_uncased_pt_5.1.1_3.0_1694556951243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertabaporu_large_uncased_pt_5.1.1_3.0_1694556951243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertabaporu_large_uncased","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertabaporu_large_uncased", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertabaporu_large_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.4 GB| + +## References + +https://huggingface.co/pablocosta/bertabaporu-large-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_1e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_1e_en.md new file mode 100644 index 00000000000000..c433c7b1c1cdf9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbase_ug_1e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbase_ug_1e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbase_ug_1e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbase_ug_1e_en_5.1.1_3.0_1694559625780.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbase_ug_1e_en_5.1.1_3.0_1694559625780.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbase_ug_1e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbase_ug_1e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbase_ug_1e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.1 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBase_UG_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_2e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_2e_en.md new file mode 100644 index 00000000000000..7bdf0ba53b5e80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_2e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbase_ug_2e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbase_ug_2e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbase_ug_2e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbase_ug_2e_en_5.1.1_3.0_1694559815371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbase_ug_2e_en_5.1.1_3.0_1694559815371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbase_ug_2e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbase_ug_2e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbase_ug_2e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|493.6 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBase_UG_2e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_3e_en.md new file mode 100644 index 00000000000000..064c1de630ce77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbase_ug_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbase_ug_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbase_ug_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbase_ug_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbase_ug_3e_en_5.1.1_3.0_1694559989794.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbase_ug_3e_en_5.1.1_3.0_1694559989794.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbase_ug_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbase_ug_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbase_ug_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|492.9 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBase_UG_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_1e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_1e_en.md new file mode 100644 index 00000000000000..adfb015f86eb3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbasekk_1e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbasekk_1e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbasekk_1e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbasekk_1e_en_5.1.1_3.0_1694553907199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbasekk_1e_en_5.1.1_3.0_1694553907199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbasekk_1e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbasekk_1e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbasekk_1e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.1 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBaseKK_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_2e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_2e_en.md new file mode 100644 index 00000000000000..cd439282d3971e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_2e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbasekk_2e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbasekk_2e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbasekk_2e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbasekk_2e_en_5.1.1_3.0_1694558294650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbasekk_2e_en_5.1.1_3.0_1694558294650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbasekk_2e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbasekk_2e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbasekk_2e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|493.5 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBaseKK_2e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_3e_en.md new file mode 100644 index 00000000000000..664ccdcda5be16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertbasekk_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertbasekk_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: bertbasekk_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbasekk_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbasekk_3e_en_5.1.1_3.0_1694558506984.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbasekk_3e_en_5.1.1_3.0_1694558506984.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbasekk_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbasekk_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbasekk_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|492.9 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/BertBaseKK_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertimbaulaw_base_portuguese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertimbaulaw_base_portuguese_cased_en.md new file mode 100644 index 00000000000000..2860b20b5c3843 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertimbaulaw_base_portuguese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertimbaulaw_base_portuguese_cased BertEmbeddings from alfaneo +author: John Snow Labs +name: bertimbaulaw_base_portuguese_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbaulaw_base_portuguese_cased` is a English model originally trained by alfaneo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbaulaw_base_portuguese_cased_en_5.1.1_3.0_1694557285783.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbaulaw_base_portuguese_cased_en_5.1.1_3.0_1694557285783.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertimbaulaw_base_portuguese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertimbaulaw_base_portuguese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbaulaw_base_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.8 MB| + +## References + +https://huggingface.co/alfaneo/bertimbaulaw-base-portuguese-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdata_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdata_en.md new file mode 100644 index 00000000000000..5f86509ecf8c0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdata_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdata BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdata +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdata` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdata_en_5.1.1_3.0_1694561421127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdata_en_5.1.1_3.0_1694561421127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdata","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdata", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdata| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialData \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall03_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall03_en.md new file mode 100644 index 00000000000000..c3596f32f058aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall03_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataall03 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataall03 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataall03` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataall03_en_5.1.1_3.0_1694561834775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataall03_en_5.1.1_3.0_1694561834775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataall03","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataall03", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataall03| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALL03 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall04_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall04_en.md new file mode 100644 index 00000000000000..82a05b455c516f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall04_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataall04 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataall04 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataall04` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataall04_en_5.1.1_3.0_1694561966824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataall04_en_5.1.1_3.0_1694561966824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataall04","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataall04", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataall04| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALL04 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall_en.md new file mode 100644 index 00000000000000..423677d03ebd6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataall_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataall BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataall +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataall` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataall_en_5.1.1_3.0_1694561562918.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataall_en_5.1.1_3.0_1694561562918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataall","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataall", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataall| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly02_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly02_en.md new file mode 100644 index 00000000000000..746f768027b890 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly02_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly02 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly02 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly02` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly02_en_5.1.1_3.0_1694562283503.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly02_en_5.1.1_3.0_1694562283503.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly02","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly02", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly02| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly03_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly03_en.md new file mode 100644 index 00000000000000..a6fa8c74e184dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly03_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly03 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly03 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly03` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly03_en_5.1.1_3.0_1694562426971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly03_en_5.1.1_3.0_1694562426971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly03","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly03", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly03| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly03 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly04_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly04_en.md new file mode 100644 index 00000000000000..3b08bfc73ced60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly04_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly04 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly04 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly04` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly04_en_5.1.1_3.0_1694562548301.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly04_en_5.1.1_3.0_1694562548301.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly04","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly04", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly04| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly04 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly05_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly05_en.md new file mode 100644 index 00000000000000..189dd7a9d07187 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly05_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly05 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly05 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly05` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly05_en_5.1.1_3.0_1694562715391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly05_en_5.1.1_3.0_1694562715391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly05","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly05", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly05| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly05 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly06_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly06_en.md new file mode 100644 index 00000000000000..b4e3b5cb261d20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly06_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly06 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly06 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly06` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly06_en_5.1.1_3.0_1694562884246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly06_en_5.1.1_3.0_1694562884246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly06","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly06", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly06| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly06 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly07_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly07_en.md new file mode 100644 index 00000000000000..2dfc8a0c42b127 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly07_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly07 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly07 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly07` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly07_en_5.1.1_3.0_1694563003244.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly07_en_5.1.1_3.0_1694563003244.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly07","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly07", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly07| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly07 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly08_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly08_en.md new file mode 100644 index 00000000000000..d82bd962260441 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly08_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly08 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly08 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly08` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly08_en_5.1.1_3.0_1694563138956.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly08_en_5.1.1_3.0_1694563138956.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly08","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly08", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly08| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly08 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly_en.md b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly_en.md new file mode 100644 index 00000000000000..80c0ad1a0239c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bertjewdialdataallqonly_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly_en_5.1.1_3.0_1694562127211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly_en_5.1.1_3.0_1694562127211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-beto_clinical_wl_spanish_es.md b/docs/_posts/ahmedlone127/2023-09-12-beto_clinical_wl_spanish_es.md new file mode 100644 index 00000000000000..a8541213471dab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-beto_clinical_wl_spanish_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish beto_clinical_wl_spanish BertEmbeddings from plncmm +author: John Snow Labs +name: beto_clinical_wl_spanish +date: 2023-09-12 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_clinical_wl_spanish` is a Castilian, Spanish model originally trained by plncmm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_clinical_wl_spanish_es_5.1.1_3.0_1694510336701.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_clinical_wl_spanish_es_5.1.1_3.0_1694510336701.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("beto_clinical_wl_spanish","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("beto_clinical_wl_spanish", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_clinical_wl_spanish| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.7 MB| + +## References + +https://huggingface.co/plncmm/beto-clinical-wl-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bio_bert_base_spanish_wwm_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-bio_bert_base_spanish_wwm_cased_en.md new file mode 100644 index 00000000000000..bbc180b3cb44a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bio_bert_base_spanish_wwm_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_bert_base_spanish_wwm_cased BertEmbeddings from mrojas +author: John Snow Labs +name: bio_bert_base_spanish_wwm_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_bert_base_spanish_wwm_cased` is a English model originally trained by mrojas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_bert_base_spanish_wwm_cased_en_5.1.1_3.0_1694556544912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_bert_base_spanish_wwm_cased_en_5.1.1_3.0_1694556544912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_bert_base_spanish_wwm_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_bert_base_spanish_wwm_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_bert_base_spanish_wwm_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.0 MB| + +## References + +https://huggingface.co/mrojas/bio-bert-base-spanish-wwm-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biobert_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-biobert_italian_en.md new file mode 100644 index 00000000000000..a0dc73da5299ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biobert_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_italian BertEmbeddings from marcopost-it +author: John Snow Labs +name: biobert_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_italian` is a English model originally trained by marcopost-it. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_italian_en_5.1.1_3.0_1694551323419.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_italian_en_5.1.1_3.0_1694551323419.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.2 MB| + +## References + +https://huggingface.co/marcopost-it/biobert-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biobert_patent_reference_extraction_en.md b/docs/_posts/ahmedlone127/2023-09-12-biobert_patent_reference_extraction_en.md new file mode 100644 index 00000000000000..3b583aca84bfbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biobert_patent_reference_extraction_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_patent_reference_extraction BertEmbeddings from kaesve +author: John Snow Labs +name: biobert_patent_reference_extraction +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_patent_reference_extraction` is a English model originally trained by kaesve. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_patent_reference_extraction_en_5.1.1_3.0_1694507994597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_patent_reference_extraction_en_5.1.1_3.0_1694507994597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_patent_reference_extraction","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_patent_reference_extraction", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_patent_reference_extraction| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/kaesve/BioBERT_patent_reference_extraction \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biobert_v1.1_pubmed_en.md b/docs/_posts/ahmedlone127/2023-09-12-biobert_v1.1_pubmed_en.md new file mode 100644 index 00000000000000..b6795b741d65a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biobert_v1.1_pubmed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_v1.1_pubmed BertEmbeddings from monologg +author: John Snow Labs +name: biobert_v1.1_pubmed +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_v1.1_pubmed` is a English model originally trained by monologg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_v1.1_pubmed_en_5.1.1_3.0_1694554769599.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_v1.1_pubmed_en_5.1.1_3.0_1694554769599.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_v1.1_pubmed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_v1.1_pubmed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_v1.1_pubmed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/monologg/biobert_v1.1_pubmed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bioclinicalbert_finetuned_covid_papers_en.md b/docs/_posts/ahmedlone127/2023-09-12-bioclinicalbert_finetuned_covid_papers_en.md new file mode 100644 index 00000000000000..512beedd3752f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bioclinicalbert_finetuned_covid_papers_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bioclinicalbert_finetuned_covid_papers BertEmbeddings from mrm8488 +author: John Snow Labs +name: bioclinicalbert_finetuned_covid_papers +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioclinicalbert_finetuned_covid_papers` is a English model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioclinicalbert_finetuned_covid_papers_en_5.1.1_3.0_1694556257175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioclinicalbert_finetuned_covid_papers_en_5.1.1_3.0_1694556257175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bioclinicalbert_finetuned_covid_papers","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bioclinicalbert_finetuned_covid_papers", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioclinicalbert_finetuned_covid_papers| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.2 MB| + +## References + +https://huggingface.co/mrm8488/bioclinicalBERT-finetuned-covid-papers \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_en.md b/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_en.md new file mode 100644 index 00000000000000..423700e0cb5575 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biomednlp_pubmedbert_base_uncased_abstract BertEmbeddings from microsoft +author: John Snow Labs +name: biomednlp_pubmedbert_base_uncased_abstract +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_base_uncased_abstract` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_en_5.1.1_3.0_1694553564368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_en_5.1.1_3.0_1694553564368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biomednlp_pubmedbert_base_uncased_abstract","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biomednlp_pubmedbert_base_uncased_abstract", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_base_uncased_abstract| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_fulltext_en.md b/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_fulltext_en.md new file mode 100644 index 00000000000000..1196d738573394 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-biomednlp_pubmedbert_base_uncased_abstract_fulltext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biomednlp_pubmedbert_base_uncased_abstract_fulltext BertEmbeddings from microsoft +author: John Snow Labs +name: biomednlp_pubmedbert_base_uncased_abstract_fulltext +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomednlp_pubmedbert_base_uncased_abstract_fulltext` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_fulltext_en_5.1.1_3.0_1694553416047.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomednlp_pubmedbert_base_uncased_abstract_fulltext_en_5.1.1_3.0_1694553416047.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biomednlp_pubmedbert_base_uncased_abstract_fulltext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biomednlp_pubmedbert_base_uncased_abstract_fulltext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomednlp_pubmedbert_base_uncased_abstract_fulltext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-bodo_bert_mlm_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-bodo_bert_mlm_base_en.md new file mode 100644 index 00000000000000..b20ee811c39217 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-bodo_bert_mlm_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bodo_bert_mlm_base BertEmbeddings from alayaran +author: John Snow Labs +name: bodo_bert_mlm_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bodo_bert_mlm_base` is a English model originally trained by alayaran. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bodo_bert_mlm_base_en_5.1.1_3.0_1694509384576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bodo_bert_mlm_base_en_5.1.1_3.0_1694509384576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bodo_bert_mlm_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bodo_bert_mlm_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bodo_bert_mlm_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/alayaran/bodo-bert-mlm-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-burmese_bert_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-burmese_bert_mlm_en.md new file mode 100644 index 00000000000000..9cbed457538a52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-burmese_bert_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_bert_mlm BertEmbeddings from minn +author: John Snow Labs +name: burmese_bert_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_bert_mlm` is a English model originally trained by minn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_bert_mlm_en_5.1.1_3.0_1694553778330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_bert_mlm_en_5.1.1_3.0_1694553778330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("burmese_bert_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("burmese_bert_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_bert_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/minn/my-bert-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_note_en.md b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_note_en.md new file mode 100644 index 00000000000000..ffb6856020bd46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_note_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicaladaptation_pubmedbert_base_uncased_mimic_note BertEmbeddings from jhliu +author: John Snow Labs +name: clinicaladaptation_pubmedbert_base_uncased_mimic_note +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicaladaptation_pubmedbert_base_uncased_mimic_note` is a English model originally trained by jhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_note_en_5.1.1_3.0_1694550817266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_note_en_5.1.1_3.0_1694550817266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_note","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_note", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicaladaptation_pubmedbert_base_uncased_mimic_note| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/jhliu/ClinicalAdaptation-PubMedBERT-base-uncased-MIMIC-note \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en.md b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en.md new file mode 100644 index 00000000000000..9b4dc43c91f411 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicaladaptation_pubmedbert_base_uncased_mimic_segment BertEmbeddings from jhliu +author: John Snow Labs +name: clinicaladaptation_pubmedbert_base_uncased_mimic_segment +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicaladaptation_pubmedbert_base_uncased_mimic_segment` is a English model originally trained by jhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en_5.1.1_3.0_1694550645740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_segment_en_5.1.1_3.0_1694550645740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_segment","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_segment", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicaladaptation_pubmedbert_base_uncased_mimic_segment| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/jhliu/ClinicalAdaptation-PubMedBERT-base-uncased-MIMIC-segment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en.md b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en.md new file mode 100644 index 00000000000000..e4b370235616db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicaladaptation_pubmedbert_base_uncased_mimic_sentence BertEmbeddings from jhliu +author: John Snow Labs +name: clinicaladaptation_pubmedbert_base_uncased_mimic_sentence +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicaladaptation_pubmedbert_base_uncased_mimic_sentence` is a English model originally trained by jhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en_5.1.1_3.0_1694550482919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicaladaptation_pubmedbert_base_uncased_mimic_sentence_en_5.1.1_3.0_1694550482919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_sentence","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicaladaptation_pubmedbert_base_uncased_mimic_sentence", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicaladaptation_pubmedbert_base_uncased_mimic_sentence| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/jhliu/ClinicalAdaptation-PubMedBERT-base-uncased-MIMIC-sentence \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-clinicalnotebert_base_uncased_mimic_segment_note_en.md b/docs/_posts/ahmedlone127/2023-09-12-clinicalnotebert_base_uncased_mimic_segment_note_en.md new file mode 100644 index 00000000000000..b8f6d52173905d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-clinicalnotebert_base_uncased_mimic_segment_note_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicalnotebert_base_uncased_mimic_segment_note BertEmbeddings from jhliu +author: John Snow Labs +name: clinicalnotebert_base_uncased_mimic_segment_note +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalnotebert_base_uncased_mimic_segment_note` is a English model originally trained by jhliu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalnotebert_base_uncased_mimic_segment_note_en_5.1.1_3.0_1694550310365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalnotebert_base_uncased_mimic_segment_note_en_5.1.1_3.0_1694550310365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicalnotebert_base_uncased_mimic_segment_note","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicalnotebert_base_uncased_mimic_segment_note", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalnotebert_base_uncased_mimic_segment_note| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/jhliu/ClinicalNoteBERT-base-uncased-MIMIC-segment-note \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-cord19_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-cord19_bert_en.md new file mode 100644 index 00000000000000..0b81945bf7d886 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-cord19_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cord19_bert BertEmbeddings from CovRelex-SE +author: John Snow Labs +name: cord19_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cord19_bert` is a English model originally trained by CovRelex-SE. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cord19_bert_en_5.1.1_3.0_1694559956829.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cord19_bert_en_5.1.1_3.0_1694559956829.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cord19_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cord19_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cord19_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/CovRelex-SE/CORD19-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-covid19_fake_news_bert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-covid19_fake_news_bert_uncased_en.md new file mode 100644 index 00000000000000..a6af0e72f2bd6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-covid19_fake_news_bert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English covid19_fake_news_bert_uncased BertEmbeddings from Jawaher +author: John Snow Labs +name: covid19_fake_news_bert_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid19_fake_news_bert_uncased` is a English model originally trained by Jawaher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid19_fake_news_bert_uncased_en_5.1.1_3.0_1694510515872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid19_fake_news_bert_uncased_en_5.1.1_3.0_1694510515872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("covid19_fake_news_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("covid19_fake_news_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid19_fake_news_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Jawaher/Covid19-fake-news-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-covid_scibert_en.md b/docs/_posts/ahmedlone127/2023-09-12-covid_scibert_en.md new file mode 100644 index 00000000000000..75b293db51c7a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-covid_scibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English covid_scibert BertEmbeddings from lordtt13 +author: John Snow Labs +name: covid_scibert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_scibert` is a English model originally trained by lordtt13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_scibert_en_5.1.1_3.0_1694549378978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_scibert_en_5.1.1_3.0_1694549378978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("covid_scibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("covid_scibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_scibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/lordtt13/COVID-SciBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-covid_vaccine_twitter_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-covid_vaccine_twitter_bert_en.md new file mode 100644 index 00000000000000..2189a1b689145b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-covid_vaccine_twitter_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English covid_vaccine_twitter_bert BertEmbeddings from GateNLP +author: John Snow Labs +name: covid_vaccine_twitter_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_vaccine_twitter_bert` is a English model originally trained by GateNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_vaccine_twitter_bert_en_5.1.1_3.0_1694553221558.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_vaccine_twitter_bert_en_5.1.1_3.0_1694553221558.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("covid_vaccine_twitter_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("covid_vaccine_twitter_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_vaccine_twitter_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/GateNLP/covid-vaccine-twitter-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-cro_cov_csebert_en.md b/docs/_posts/ahmedlone127/2023-09-12-cro_cov_csebert_en.md new file mode 100644 index 00000000000000..a82e91d1fe4222 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-cro_cov_csebert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cro_cov_csebert BertEmbeddings from InfoCoV +author: John Snow Labs +name: cro_cov_csebert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cro_cov_csebert` is a English model originally trained by InfoCoV. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cro_cov_csebert_en_5.1.1_3.0_1694559840041.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cro_cov_csebert_en_5.1.1_3.0_1694559840041.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cro_cov_csebert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cro_cov_csebert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cro_cov_csebert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|463.4 MB| + +## References + +https://huggingface.co/InfoCoV/Cro-CoV-cseBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-crosloengual_bert_hr.md b/docs/_posts/ahmedlone127/2023-09-12-crosloengual_bert_hr.md new file mode 100644 index 00000000000000..f3a4c1d30af803 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-crosloengual_bert_hr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Croatian crosloengual_bert BertEmbeddings from EMBEDDIA +author: John Snow Labs +name: crosloengual_bert +date: 2023-09-12 +tags: [bert, hr, open_source, fill_mask, onnx] +task: Embeddings +language: hr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crosloengual_bert` is a Croatian model originally trained by EMBEDDIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crosloengual_bert_hr_5.1.1_3.0_1694509284745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crosloengual_bert_hr_5.1.1_3.0_1694509284745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("crosloengual_bert","hr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("crosloengual_bert", "hr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crosloengual_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hr| +|Size:|463.4 MB| + +## References + +https://huggingface.co/EMBEDDIA/crosloengual-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-custominlawbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-custominlawbert_en.md new file mode 100644 index 00000000000000..7cfed09f199915 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-custominlawbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English custominlawbert BertEmbeddings from law-ai +author: John Snow Labs +name: custominlawbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`custominlawbert` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/custominlawbert_en_5.1.1_3.0_1694553237351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/custominlawbert_en_5.1.1_3.0_1694553237351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("custominlawbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("custominlawbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|custominlawbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.5 MB| + +## References + +https://huggingface.co/law-ai/CustomInLawBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-cysecbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-cysecbert_en.md new file mode 100644 index 00000000000000..124d42903439a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-cysecbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cysecbert BertEmbeddings from markusbayer +author: John Snow Labs +name: cysecbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cysecbert` is a English model originally trained by markusbayer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cysecbert_en_5.1.1_3.0_1694559903370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cysecbert_en_5.1.1_3.0_1694559903370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cysecbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cysecbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cysecbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/markusbayer/CySecBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dal_bert_finetuned_address_v1_en.md b/docs/_posts/ahmedlone127/2023-09-12-dal_bert_finetuned_address_v1_en.md new file mode 100644 index 00000000000000..7352a07970a0f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dal_bert_finetuned_address_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dal_bert_finetuned_address_v1 BertEmbeddings from IRI2070 +author: John Snow Labs +name: dal_bert_finetuned_address_v1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dal_bert_finetuned_address_v1` is a English model originally trained by IRI2070. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dal_bert_finetuned_address_v1_en_5.1.1_3.0_1694554981931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dal_bert_finetuned_address_v1_en_5.1.1_3.0_1694554981931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dal_bert_finetuned_address_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dal_bert_finetuned_address_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dal_bert_finetuned_address_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|432.1 MB| + +## References + +https://huggingface.co/IRI2070/dal-bert-finetuned-address-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-deberta_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-deberta_base_uncased_en.md new file mode 100644 index 00000000000000..44870658d01905 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-deberta_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English deberta_base_uncased BertEmbeddings from mlcorelib +author: John Snow Labs +name: deberta_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`deberta_base_uncased` is a English model originally trained by mlcorelib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/deberta_base_uncased_en_5.1.1_3.0_1694554096071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/deberta_base_uncased_en_5.1.1_3.0_1694554096071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("deberta_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("deberta_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|deberta_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/mlcorelib/deberta-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-divehi_labse_dv.md b/docs/_posts/ahmedlone127/2023-09-12-divehi_labse_dv.md new file mode 100644 index 00000000000000..7019823ece87a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-divehi_labse_dv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Dhivehi, Divehi, Maldivian divehi_labse BertEmbeddings from monsoon-nlp +author: John Snow Labs +name: divehi_labse +date: 2023-09-12 +tags: [bert, dv, open_source, fill_mask, onnx] +task: Embeddings +language: dv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`divehi_labse` is a Dhivehi, Divehi, Maldivian model originally trained by monsoon-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/divehi_labse_dv_5.1.1_3.0_1694555287990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/divehi_labse_dv_5.1.1_3.0_1694555287990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("divehi_labse","dv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("divehi_labse", "dv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|divehi_labse| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|dv| +|Size:|1.9 GB| + +## References + +https://huggingface.co/monsoon-nlp/dv-labse \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-divehi_muril_dv.md b/docs/_posts/ahmedlone127/2023-09-12-divehi_muril_dv.md new file mode 100644 index 00000000000000..7b13bb38cf35a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-divehi_muril_dv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Dhivehi, Divehi, Maldivian divehi_muril BertEmbeddings from monsoon-nlp +author: John Snow Labs +name: divehi_muril +date: 2023-09-12 +tags: [bert, dv, open_source, fill_mask, onnx] +task: Embeddings +language: dv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`divehi_muril` is a Dhivehi, Divehi, Maldivian model originally trained by monsoon-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/divehi_muril_dv_5.1.1_3.0_1694555552470.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/divehi_muril_dv_5.1.1_3.0_1694555552470.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("divehi_muril","dv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("divehi_muril", "dv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|divehi_muril| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|dv| +|Size:|919.1 MB| + +## References + +https://huggingface.co/monsoon-nlp/dv-muril \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_en.md new file mode 100644 index 00000000000000..3d73ff9eaffdb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm BertEmbeddings from bayartsogt +author: John Snow Labs +name: dlub_2022_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm` is a English model originally trained by bayartsogt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_en_5.1.1_3.0_1694555892646.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_en_5.1.1_3.0_1694555892646.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/bayartsogt/dlub-2022-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_gansukh_en.md b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_gansukh_en.md new file mode 100644 index 00000000000000..c1b12f7c896339 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_gansukh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm_full_gansukh BertEmbeddings from Gansukh +author: John Snow Labs +name: dlub_2022_mlm_full_gansukh +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm_full_gansukh` is a English model originally trained by Gansukh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_gansukh_en_5.1.1_3.0_1694555350358.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_gansukh_en_5.1.1_3.0_1694555350358.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm_full_gansukh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm_full_gansukh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm_full_gansukh| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/Gansukh/dlub-2022-mlm-full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_ganzorig_en.md b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_ganzorig_en.md new file mode 100644 index 00000000000000..8b063a7d5ded53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_ganzorig_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm_full_ganzorig BertEmbeddings from ganzorig +author: John Snow Labs +name: dlub_2022_mlm_full_ganzorig +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm_full_ganzorig` is a English model originally trained by ganzorig. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_ganzorig_en_5.1.1_3.0_1694555674512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_ganzorig_en_5.1.1_3.0_1694555674512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm_full_ganzorig","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm_full_ganzorig", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm_full_ganzorig| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/ganzorig/dlub-2022-mlm-full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_omunkhuush_en.md b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_omunkhuush_en.md new file mode 100644 index 00000000000000..9085341fd867d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dlub_2022_mlm_full_omunkhuush_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm_full_omunkhuush BertEmbeddings from omunkhuush +author: John Snow Labs +name: dlub_2022_mlm_full_omunkhuush +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm_full_omunkhuush` is a English model originally trained by omunkhuush. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_omunkhuush_en_5.1.1_3.0_1694555506851.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_omunkhuush_en_5.1.1_3.0_1694555506851.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm_full_omunkhuush","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm_full_omunkhuush", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm_full_omunkhuush| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/omunkhuush/dlub-2022-mlm-full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dmis_bertpubmed_en.md b/docs/_posts/ahmedlone127/2023-09-12-dmis_bertpubmed_en.md new file mode 100644 index 00000000000000..d08f5b31244e6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dmis_bertpubmed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dmis_bertpubmed BertEmbeddings from abnuel +author: John Snow Labs +name: dmis_bertpubmed +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dmis_bertpubmed` is a English model originally trained by abnuel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dmis_bertpubmed_en_5.1.1_3.0_1694561812368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dmis_bertpubmed_en_5.1.1_3.0_1694561812368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dmis_bertpubmed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dmis_bertpubmed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dmis_bertpubmed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/abnuel/dmis_bertpubmed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dpr_passage_uned_en.md b/docs/_posts/ahmedlone127/2023-09-12-dpr_passage_uned_en.md new file mode 100644 index 00000000000000..e7ce09605c4ebe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dpr_passage_uned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_passage_uned BertEmbeddings from avacaondata +author: John Snow Labs +name: dpr_passage_uned +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_passage_uned` is a English model originally trained by avacaondata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_passage_uned_en_5.1.1_3.0_1694554318636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_passage_uned_en_5.1.1_3.0_1694554318636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_passage_uned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_passage_uned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_passage_uned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/avacaondata/dpr-passage-uned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dpr_query_uned_en.md b/docs/_posts/ahmedlone127/2023-09-12-dpr_query_uned_en.md new file mode 100644 index 00000000000000..1e3272bd6f9bcd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dpr_query_uned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_query_uned BertEmbeddings from avacaondata +author: John Snow Labs +name: dpr_query_uned +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_query_uned` is a English model originally trained by avacaondata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_query_uned_en_5.1.1_3.0_1694554164070.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_query_uned_en_5.1.1_3.0_1694554164070.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_query_uned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_query_uned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_query_uned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/avacaondata/dpr-query-uned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-drbert_4gb_cp_pubmedbert_fr.md b/docs/_posts/ahmedlone127/2023-09-12-drbert_4gb_cp_pubmedbert_fr.md new file mode 100644 index 00000000000000..50d27a903f93f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-drbert_4gb_cp_pubmedbert_fr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: French drbert_4gb_cp_pubmedbert BertEmbeddings from Dr-BERT +author: John Snow Labs +name: drbert_4gb_cp_pubmedbert +date: 2023-09-12 +tags: [bert, fr, open_source, fill_mask, onnx] +task: Embeddings +language: fr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`drbert_4gb_cp_pubmedbert` is a French model originally trained by Dr-BERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/drbert_4gb_cp_pubmedbert_fr_5.1.1_3.0_1694550608539.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/drbert_4gb_cp_pubmedbert_fr_5.1.1_3.0_1694550608539.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("drbert_4gb_cp_pubmedbert","fr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("drbert_4gb_cp_pubmedbert", "fr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|drbert_4gb_cp_pubmedbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fr| +|Size:|408.2 MB| + +## References + +https://huggingface.co/Dr-BERT/DrBERT-4GB-CP-PubMedBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-drclips_en.md b/docs/_posts/ahmedlone127/2023-09-12-drclips_en.md new file mode 100644 index 00000000000000..4a0d7ba74ff95d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-drclips_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English drclips BertEmbeddings from maximedb +author: John Snow Labs +name: drclips +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`drclips` is a English model originally trained by maximedb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/drclips_en_5.1.1_3.0_1694552359667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/drclips_en_5.1.1_3.0_1694552359667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("drclips","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("drclips", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|drclips| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/maximedb/drclips \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dummy_model_arthuerwang_en.md b/docs/_posts/ahmedlone127/2023-09-12-dummy_model_arthuerwang_en.md new file mode 100644 index 00000000000000..e21a27e6031c61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dummy_model_arthuerwang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_arthuerwang BertEmbeddings from Arthuerwang +author: John Snow Labs +name: dummy_model_arthuerwang +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_arthuerwang` is a English model originally trained by Arthuerwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_arthuerwang_en_5.1.1_3.0_1694509846009.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_arthuerwang_en_5.1.1_3.0_1694509846009.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_arthuerwang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_arthuerwang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_arthuerwang| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Arthuerwang/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-dummy_model_bigtimecodersean_en.md b/docs/_posts/ahmedlone127/2023-09-12-dummy_model_bigtimecodersean_en.md new file mode 100644 index 00000000000000..698d345c4e10f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-dummy_model_bigtimecodersean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_bigtimecodersean BertEmbeddings from BigTimeCoderSean +author: John Snow Labs +name: dummy_model_bigtimecodersean +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_bigtimecodersean` is a English model originally trained by BigTimeCoderSean. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_bigtimecodersean_en_5.1.1_3.0_1694557914292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_bigtimecodersean_en_5.1.1_3.0_1694557914292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_bigtimecodersean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_bigtimecodersean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_bigtimecodersean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/BigTimeCoderSean/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-efficient_splade_vi_bt_large_query_en.md b/docs/_posts/ahmedlone127/2023-09-12-efficient_splade_vi_bt_large_query_en.md new file mode 100644 index 00000000000000..c03dfb7943213b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-efficient_splade_vi_bt_large_query_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English efficient_splade_vi_bt_large_query BertEmbeddings from naver +author: John Snow Labs +name: efficient_splade_vi_bt_large_query +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_splade_vi_bt_large_query` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_splade_vi_bt_large_query_en_5.1.1_3.0_1694557934858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_splade_vi_bt_large_query_en_5.1.1_3.0_1694557934858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("efficient_splade_vi_bt_large_query","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("efficient_splade_vi_bt_large_query", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_splade_vi_bt_large_query| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/naver/efficient-splade-VI-BT-large-query \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-europarl_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-europarl_mlm_en.md new file mode 100644 index 00000000000000..aba29d2d6dfab6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-europarl_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English europarl_mlm BertEmbeddings from lukabor +author: John Snow Labs +name: europarl_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`europarl_mlm` is a English model originally trained by lukabor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/europarl_mlm_en_5.1.1_3.0_1694550101034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/europarl_mlm_en_5.1.1_3.0_1694550101034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("europarl_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("europarl_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|europarl_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/lukabor/europarl-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-finbert_pretrain_financeinc_en.md b/docs/_posts/ahmedlone127/2023-09-12-finbert_pretrain_financeinc_en.md new file mode 100644 index 00000000000000..ca737e852f38be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-finbert_pretrain_financeinc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English finbert_pretrain_financeinc BertEmbeddings from FinanceInc +author: John Snow Labs +name: finbert_pretrain_financeinc +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_pretrain_financeinc` is a English model originally trained by FinanceInc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_pretrain_financeinc_en_5.1.1_3.0_1694562283178.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_pretrain_financeinc_en_5.1.1_3.0_1694562283178.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finbert_pretrain_financeinc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finbert_pretrain_financeinc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_pretrain_financeinc| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/FinanceInc/finbert-pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-finest_bert_fi.md b/docs/_posts/ahmedlone127/2023-09-12-finest_bert_fi.md new file mode 100644 index 00000000000000..2a96b2bf931a91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-finest_bert_fi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Finnish finest_bert BertEmbeddings from EMBEDDIA +author: John Snow Labs +name: finest_bert +date: 2023-09-12 +tags: [bert, fi, open_source, fill_mask, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finest_bert` is a Finnish model originally trained by EMBEDDIA. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finest_bert_fi_5.1.1_3.0_1694509453012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finest_bert_fi_5.1.1_3.0_1694509453012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finest_bert","fi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finest_bert", "fi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finest_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fi| +|Size:|535.1 MB| + +## References + +https://huggingface.co/EMBEDDIA/finest-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-finetuned_bert_base_multilingual_cased_noisy_english_malay_xx.md b/docs/_posts/ahmedlone127/2023-09-12-finetuned_bert_base_multilingual_cased_noisy_english_malay_xx.md new file mode 100644 index 00000000000000..e2d475104f3bde --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-finetuned_bert_base_multilingual_cased_noisy_english_malay_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual finetuned_bert_base_multilingual_cased_noisy_english_malay BertEmbeddings from mesolitica +author: John Snow Labs +name: finetuned_bert_base_multilingual_cased_noisy_english_malay +date: 2023-09-12 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_bert_base_multilingual_cased_noisy_english_malay` is a Multilingual model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_bert_base_multilingual_cased_noisy_english_malay_xx_5.1.1_3.0_1694556066679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_bert_base_multilingual_cased_noisy_english_malay_xx_5.1.1_3.0_1694556066679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finetuned_bert_base_multilingual_cased_noisy_english_malay","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finetuned_bert_base_multilingual_cased_noisy_english_malay", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_bert_base_multilingual_cased_noisy_english_malay| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|557.7 MB| + +## References + +https://huggingface.co/mesolitica/finetuned-bert-base-multilingual-cased-noisy-en-ms \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-finetuned_test_1_en.md b/docs/_posts/ahmedlone127/2023-09-12-finetuned_test_1_en.md new file mode 100644 index 00000000000000..2f7df4ad42604e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-finetuned_test_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English finetuned_test_1 BertEmbeddings from ariesutiono +author: John Snow Labs +name: finetuned_test_1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_test_1` is a English model originally trained by ariesutiono. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_test_1_en_5.1.1_3.0_1694559280850.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_test_1_en_5.1.1_3.0_1694559280850.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finetuned_test_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finetuned_test_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_test_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/ariesutiono/finetuned-test-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-firmanbrilianbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-firmanbrilianbert_en.md new file mode 100644 index 00000000000000..67e9989d6439a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-firmanbrilianbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English firmanbrilianbert BertEmbeddings from FirmanBr +author: John Snow Labs +name: firmanbrilianbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`firmanbrilianbert` is a English model originally trained by FirmanBr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/firmanbrilianbert_en_5.1.1_3.0_1694548253279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/firmanbrilianbert_en_5.1.1_3.0_1694548253279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("firmanbrilianbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("firmanbrilianbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|firmanbrilianbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/FirmanBr/FirmanBrilianBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-flang_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-flang_bert_en.md new file mode 100644 index 00000000000000..d804ac64ea7551 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-flang_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English flang_bert BertEmbeddings from SALT-NLP +author: John Snow Labs +name: flang_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flang_bert` is a English model originally trained by SALT-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flang_bert_en_5.1.1_3.0_1694553487024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flang_bert_en_5.1.1_3.0_1694553487024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("flang_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("flang_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flang_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/SALT-NLP/FLANG-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-flang_spanbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-flang_spanbert_en.md new file mode 100644 index 00000000000000..b79cb06b88f94a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-flang_spanbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English flang_spanbert BertEmbeddings from SALT-NLP +author: John Snow Labs +name: flang_spanbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flang_spanbert` is a English model originally trained by SALT-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flang_spanbert_en_5.1.1_3.0_1694553636224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flang_spanbert_en_5.1.1_3.0_1694553636224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("flang_spanbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("flang_spanbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flang_spanbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/SALT-NLP/FLANG-SpanBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-gbert_large_en.md b/docs/_posts/ahmedlone127/2023-09-12-gbert_large_en.md new file mode 100644 index 00000000000000..ef130a63b743b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-gbert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gbert_large BertEmbeddings from Anjoe +author: John Snow Labs +name: gbert_large +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gbert_large` is a English model originally trained by Anjoe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gbert_large_en_5.1.1_3.0_1694548898842.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gbert_large_en_5.1.1_3.0_1694548898842.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gbert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gbert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gbert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Anjoe/gbert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hatebert_en.md b/docs/_posts/ahmedlone127/2023-09-12-hatebert_en.md new file mode 100644 index 00000000000000..816b594a5abb7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hatebert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hatebert BertEmbeddings from GroNLP +author: John Snow Labs +name: hatebert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hatebert` is a English model originally trained by GroNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hatebert_en_5.1.1_3.0_1694559158477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hatebert_en_5.1.1_3.0_1694559158477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hatebert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hatebert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hatebert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.1 MB| + +## References + +https://huggingface.co/GroNLP/hateBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hinglish_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-hinglish_bert_en.md new file mode 100644 index 00000000000000..5701ea912237f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hinglish_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hinglish_bert BertEmbeddings from meghanabhange +author: John Snow Labs +name: hinglish_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hinglish_bert` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hinglish_bert_en_5.1.1_3.0_1694552706521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hinglish_bert_en_5.1.1_3.0_1694552706521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hinglish_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hinglish_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hinglish_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/meghanabhange/Hinglish-Bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hinglish_sbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-hinglish_sbert_en.md new file mode 100644 index 00000000000000..7cf1830542349a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hinglish_sbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hinglish_sbert BertEmbeddings from meghanabhange +author: John Snow Labs +name: hinglish_sbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hinglish_sbert` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hinglish_sbert_en_5.1.1_3.0_1694552855686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hinglish_sbert_en_5.1.1_3.0_1694552855686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hinglish_sbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hinglish_sbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hinglish_sbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/meghanabhange/hinglish-sbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hinglish_sentence_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-hinglish_sentence_bert_en.md new file mode 100644 index 00000000000000..f4fd57510a2475 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hinglish_sentence_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hinglish_sentence_bert BertEmbeddings from meghanabhange +author: John Snow Labs +name: hinglish_sentence_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hinglish_sentence_bert` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hinglish_sentence_bert_en_5.1.1_3.0_1694553037722.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hinglish_sentence_bert_en_5.1.1_3.0_1694553037722.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hinglish_sentence_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hinglish_sentence_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hinglish_sentence_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/meghanabhange/hinglish-sentence-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hm_model001_en.md b/docs/_posts/ahmedlone127/2023-09-12-hm_model001_en.md new file mode 100644 index 00000000000000..cb307dedd6ecb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hm_model001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hm_model001 BertEmbeddings from FAN-L +author: John Snow Labs +name: hm_model001 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hm_model001` is a English model originally trained by FAN-L. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hm_model001_en_5.1.1_3.0_1694547885453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hm_model001_en_5.1.1_3.0_1694547885453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hm_model001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hm_model001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hm_model001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/FAN-L/HM_model001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hubertkl_en.md b/docs/_posts/ahmedlone127/2023-09-12-hubertkl_en.md new file mode 100644 index 00000000000000..19ba053bf5913d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hubertkl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hubertkl BertEmbeddings from SzegedAI +author: John Snow Labs +name: hubertkl +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubertkl` is a English model originally trained by SzegedAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubertkl_en_5.1.1_3.0_1694550469948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubertkl_en_5.1.1_3.0_1694550469948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hubertkl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hubertkl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubertkl| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/SzegedAI/HuBERTkl \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-hubertmlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-hubertmlm_en.md new file mode 100644 index 00000000000000..f6acb129c6dfad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-hubertmlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hubertmlm BertEmbeddings from SzegedAI +author: John Snow Labs +name: hubertmlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hubertmlm` is a English model originally trained by SzegedAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hubertmlm_en_5.1.1_3.0_1694550296332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hubertmlm_en_5.1.1_3.0_1694550296332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hubertmlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hubertmlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hubertmlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/SzegedAI/HuBERTmlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-indojave_codemixed_indobert_base_id.md b/docs/_posts/ahmedlone127/2023-09-12-indojave_codemixed_indobert_base_id.md new file mode 100644 index 00000000000000..9ca84832e77dc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-indojave_codemixed_indobert_base_id.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Indonesian indojave_codemixed_indobert_base BertEmbeddings from fathan +author: John Snow Labs +name: indojave_codemixed_indobert_base +date: 2023-09-12 +tags: [bert, id, open_source, fill_mask, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indojave_codemixed_indobert_base` is a Indonesian model originally trained by fathan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indojave_codemixed_indobert_base_id_5.1.1_3.0_1694549511841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indojave_codemixed_indobert_base_id_5.1.1_3.0_1694549511841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indojave_codemixed_indobert_base","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indojave_codemixed_indobert_base", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indojave_codemixed_indobert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|id| +|Size:|411.5 MB| + +## References + +https://huggingface.co/fathan/indojave-codemixed-indobert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-jobbert_german_de.md b/docs/_posts/ahmedlone127/2023-09-12-jobbert_german_de.md new file mode 100644 index 00000000000000..98ccda4cbfe005 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-jobbert_german_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German jobbert_german BertEmbeddings from agne +author: John Snow Labs +name: jobbert_german +date: 2023-09-12 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jobbert_german` is a German model originally trained by agne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jobbert_german_de_5.1.1_3.0_1694508954522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jobbert_german_de_5.1.1_3.0_1694508954522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("jobbert_german","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("jobbert_german", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jobbert_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/agne/jobBERT-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-jobgbert_de.md b/docs/_posts/ahmedlone127/2023-09-12-jobgbert_de.md new file mode 100644 index 00000000000000..5bcbf8219d5633 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-jobgbert_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German jobgbert BertEmbeddings from agne +author: John Snow Labs +name: jobgbert +date: 2023-09-12 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jobgbert` is a German model originally trained by agne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jobgbert_de_5.1.1_3.0_1694509133987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jobgbert_de_5.1.1_3.0_1694509133987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("jobgbert","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("jobgbert", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jobgbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|409.5 MB| + +## References + +https://huggingface.co/agne/jobGBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-jurisbert_base_portuguese_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-jurisbert_base_portuguese_uncased_en.md new file mode 100644 index 00000000000000..b30d0d72a3aeb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-jurisbert_base_portuguese_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English jurisbert_base_portuguese_uncased BertEmbeddings from alfaneo +author: John Snow Labs +name: jurisbert_base_portuguese_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jurisbert_base_portuguese_uncased` is a English model originally trained by alfaneo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jurisbert_base_portuguese_uncased_en_5.1.1_3.0_1694556436181.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jurisbert_base_portuguese_uncased_en_5.1.1_3.0_1694556436181.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("jurisbert_base_portuguese_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("jurisbert_base_portuguese_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jurisbert_base_portuguese_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.3 MB| + +## References + +https://huggingface.co/alfaneo/jurisbert-base-portuguese-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-k_12bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-k_12bert_en.md new file mode 100644 index 00000000000000..f9372b7d4b0227 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-k_12bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English k_12bert BertEmbeddings from vasugoel +author: John Snow Labs +name: k_12bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`k_12bert` is a English model originally trained by vasugoel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/k_12bert_en_5.1.1_3.0_1694557848064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/k_12bert_en_5.1.1_3.0_1694557848064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("k_12bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("k_12bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|k_12bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/vasugoel/K-12BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kbbert_distilled_cased_sv.md b/docs/_posts/ahmedlone127/2023-09-12-kbbert_distilled_cased_sv.md new file mode 100644 index 00000000000000..a9b5c5b7c321c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kbbert_distilled_cased_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish kbbert_distilled_cased BertEmbeddings from Addedk +author: John Snow Labs +name: kbbert_distilled_cased +date: 2023-09-12 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kbbert_distilled_cased` is a Swedish model originally trained by Addedk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kbbert_distilled_cased_sv_5.1.1_3.0_1694550965718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kbbert_distilled_cased_sv_5.1.1_3.0_1694550965718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kbbert_distilled_cased","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kbbert_distilled_cased", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kbbert_distilled_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|305.8 MB| + +## References + +https://huggingface.co/Addedk/kbbert-distilled-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_1e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_1e_en.md new file mode 100644 index 00000000000000..66ed603ae401b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkmultbert_1e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkmultbert_1e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkmultbert_1e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkmultbert_1e_en_5.1.1_3.0_1694553093201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkmultbert_1e_en_5.1.1_3.0_1694553093201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkmultbert_1e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkmultbert_1e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkmultbert_1e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.3 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKMultBert_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_2e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_2e_en.md new file mode 100644 index 00000000000000..16e41a1f5b7257 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_2e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkmultbert_2e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkmultbert_2e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkmultbert_2e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkmultbert_2e_en_5.1.1_3.0_1694553427917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkmultbert_2e_en_5.1.1_3.0_1694553427917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkmultbert_2e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkmultbert_2e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkmultbert_2e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.2 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKMultBert_2e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_3e_en.md new file mode 100644 index 00000000000000..b4eed81ec9c53b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkmultbert_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkmultbert_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkmultbert_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkmultbert_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkmultbert_3e_en_5.1.1_3.0_1694553624770.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkmultbert_3e_en_5.1.1_3.0_1694553624770.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkmultbert_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkmultbert_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkmultbert_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.1 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKMultBert_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_1e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_1e_en.md new file mode 100644 index 00000000000000..5ef92c1e47d453 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkturkbert_bynaumen_1e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkturkbert_bynaumen_1e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkturkbert_bynaumen_1e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_1e_en_5.1.1_3.0_1694559139710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_1e_en_5.1.1_3.0_1694559139710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkturkbert_bynaumen_1e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkturkbert_bynaumen_1e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkturkbert_bynaumen_1e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.0 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKTurkBert_byNAUMEN_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_2e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_2e_en.md new file mode 100644 index 00000000000000..79fbc1e527c3e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_2e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkturkbert_bynaumen_2e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkturkbert_bynaumen_2e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkturkbert_bynaumen_2e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_2e_en_5.1.1_3.0_1694558924867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_2e_en_5.1.1_3.0_1694558924867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkturkbert_bynaumen_2e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkturkbert_bynaumen_2e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkturkbert_bynaumen_2e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|493.6 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKTurkBert_byNAUMEN_2e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_3e_en.md new file mode 100644 index 00000000000000..b6c076196dbbe0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kkturkbert_bynaumen_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kkturkbert_bynaumen_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: kkturkbert_bynaumen_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kkturkbert_bynaumen_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_3e_en_5.1.1_3.0_1694558723631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kkturkbert_bynaumen_3e_en_5.1.1_3.0_1694558723631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kkturkbert_bynaumen_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kkturkbert_bynaumen_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kkturkbert_bynaumen_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|493.0 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/KKTurkBert_byNAUMEN_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-korean_mathbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-korean_mathbert_en.md new file mode 100644 index 00000000000000..9c7ef7b8dc8b8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-korean_mathbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English korean_mathbert BertEmbeddings from jnsulee +author: John Snow Labs +name: korean_mathbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`korean_mathbert` is a English model originally trained by jnsulee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/korean_mathbert_en_5.1.1_3.0_1694553539059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/korean_mathbert_en_5.1.1_3.0_1694553539059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("korean_mathbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("korean_mathbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|korean_mathbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/jnsulee/ko-mathbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_keyword_sentence_10000_0.0003_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_keyword_sentence_10000_0.0003_en.md new file mode 100644 index 00000000000000..eaf877612e1c35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_keyword_sentence_10000_0.0003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_keyword_sentence_10000_0.0003 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_keyword_sentence_10000_0.0003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_keyword_sentence_10000_0.0003` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_keyword_sentence_10000_0.0003_en_5.1.1_3.0_1694548488337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_keyword_sentence_10000_0.0003_en_5.1.1_3.0_1694548488337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_keyword_sentence_10000_0.0003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_keyword_sentence_10000_0.0003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_keyword_sentence_10000_0.0003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_keyword_sentence_10000_0.0003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_2_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_2_en.md new file mode 100644 index 00000000000000..125180ba2abf59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_vanilla_document_10000_0.0003_2 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_vanilla_document_10000_0.0003_2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_vanilla_document_10000_0.0003_2` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_document_10000_0.0003_2_en_5.1.1_3.0_1694553795601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_document_10000_0.0003_2_en_5.1.1_3.0_1694553795601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_vanilla_document_10000_0.0003_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_vanilla_document_10000_0.0003_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_vanilla_document_10000_0.0003_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_vanilla_document_10000_0.0003_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_en.md new file mode 100644 index 00000000000000..9eb8a8f55f5076 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_document_10000_0.0003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_vanilla_document_10000_0.0003 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_vanilla_document_10000_0.0003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_vanilla_document_10000_0.0003` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_document_10000_0.0003_en_5.1.1_3.0_1694548616966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_document_10000_0.0003_en_5.1.1_3.0_1694548616966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_vanilla_document_10000_0.0003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_vanilla_document_10000_0.0003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_vanilla_document_10000_0.0003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_vanilla_document_10000_0.0003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_2_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_2_en.md new file mode 100644 index 00000000000000..3a43754a499d0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_vanilla_sentence_10000_0.0003_2 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_vanilla_sentence_10000_0.0003_2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_vanilla_sentence_10000_0.0003_2` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_sentence_10000_0.0003_2_en_5.1.1_3.0_1694553337510.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_sentence_10000_0.0003_2_en_5.1.1_3.0_1694553337510.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_vanilla_sentence_10000_0.0003_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_vanilla_sentence_10000_0.0003_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_vanilla_sentence_10000_0.0003_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_vanilla_sentence_10000_0.0003_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_en.md b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_en.md new file mode 100644 index 00000000000000..9406230adc3229 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kw_pubmed_vanilla_sentence_10000_0.0003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_vanilla_sentence_10000_0.0003 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_vanilla_sentence_10000_0.0003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_vanilla_sentence_10000_0.0003` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_sentence_10000_0.0003_en_5.1.1_3.0_1694548333195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_vanilla_sentence_10000_0.0003_en_5.1.1_3.0_1694548333195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_vanilla_sentence_10000_0.0003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_vanilla_sentence_10000_0.0003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_vanilla_sentence_10000_0.0003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_vanilla_sentence_10000_0.0003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-kykim_bert_kor_base_korean_en.md b/docs/_posts/ahmedlone127/2023-09-12-kykim_bert_kor_base_korean_en.md new file mode 100644 index 00000000000000..e391d45933b48a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-kykim_bert_kor_base_korean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kykim_bert_kor_base_korean BertEmbeddings from koala +author: John Snow Labs +name: kykim_bert_kor_base_korean +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kykim_bert_kor_base_korean` is a English model originally trained by koala. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kykim_bert_kor_base_korean_en_5.1.1_3.0_1694510273861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kykim_bert_kor_base_korean_en_5.1.1_3.0_1694510273861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kykim_bert_kor_base_korean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kykim_bert_kor_base_korean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kykim_bert_kor_base_korean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/koala/kykim-bert-kor-base-ko \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-legal_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-legal_bert_base_uncased_en.md new file mode 100644 index 00000000000000..f6ab8e6b0aea41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-legal_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_bert_base_uncased BertEmbeddings from nlpaueb +author: John Snow Labs +name: legal_bert_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_bert_base_uncased` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_bert_base_uncased_en_5.1.1_3.0_1694561549951.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_bert_base_uncased_en_5.1.1_3.0_1694561549951.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/nlpaueb/legal-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-legal_bert_small_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-legal_bert_small_uncased_en.md new file mode 100644 index 00000000000000..433d1f39c8066a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-legal_bert_small_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_bert_small_uncased BertEmbeddings from nlpaueb +author: John Snow Labs +name: legal_bert_small_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_bert_small_uncased` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_bert_small_uncased_en_5.1.1_3.0_1694561644609.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_bert_small_uncased_en_5.1.1_3.0_1694561644609.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_bert_small_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_bert_small_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_bert_small_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|130.6 MB| + +## References + +https://huggingface.co/nlpaueb/legal-bert-small-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-legal_bertimbau_large_pt.md b/docs/_posts/ahmedlone127/2023-09-12-legal_bertimbau_large_pt.md new file mode 100644 index 00000000000000..2b9ea254a999c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-legal_bertimbau_large_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese legal_bertimbau_large BertEmbeddings from rufimelo +author: John Snow Labs +name: legal_bertimbau_large +date: 2023-09-12 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_bertimbau_large` is a Portuguese model originally trained by rufimelo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_bertimbau_large_pt_5.1.1_3.0_1694562841145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_bertimbau_large_pt_5.1.1_3.0_1694562841145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_bertimbau_large","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_bertimbau_large", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_bertimbau_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.2 GB| + +## References + +https://huggingface.co/rufimelo/Legal-BERTimbau-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_fp_en.md b/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_fp_en.md new file mode 100644 index 00000000000000..9f8486bd7aaeb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_fp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legalbertpt_fp BertEmbeddings from raquelsilveira +author: John Snow Labs +name: legalbertpt_fp +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalbertpt_fp` is a English model originally trained by raquelsilveira. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalbertpt_fp_en_5.1.1_3.0_1694549902826.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalbertpt_fp_en_5.1.1_3.0_1694549902826.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalbertpt_fp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalbertpt_fp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalbertpt_fp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.8 MB| + +## References + +https://huggingface.co/raquelsilveira/legalbertpt_fp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_sardinian_en.md b/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_sardinian_en.md new file mode 100644 index 00000000000000..bfc82201ef1ea0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-legalbertpt_sardinian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legalbertpt_sardinian BertEmbeddings from raquelsilveira +author: John Snow Labs +name: legalbertpt_sardinian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalbertpt_sardinian` is a English model originally trained by raquelsilveira. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalbertpt_sardinian_en_5.1.1_3.0_1694550099835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalbertpt_sardinian_en_5.1.1_3.0_1694550099835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalbertpt_sardinian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalbertpt_sardinian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalbertpt_sardinian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|425.1 MB| + +## References + +https://huggingface.co/raquelsilveira/legalbertpt_sc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-lernnavi_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-lernnavi_bert_en.md new file mode 100644 index 00000000000000..65ebce65f7dec0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-lernnavi_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lernnavi_bert BertEmbeddings from lucazed +author: John Snow Labs +name: lernnavi_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lernnavi_bert` is a English model originally trained by lucazed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lernnavi_bert_en_5.1.1_3.0_1694561382262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lernnavi_bert_en_5.1.1_3.0_1694561382262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lernnavi_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lernnavi_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lernnavi_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/lucazed/lernnavi_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-lf_model_01_en.md b/docs/_posts/ahmedlone127/2023-09-12-lf_model_01_en.md new file mode 100644 index 00000000000000..93273c267fa6f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-lf_model_01_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lf_model_01 BertEmbeddings from lf +author: John Snow Labs +name: lf_model_01 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lf_model_01` is a English model originally trained by lf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lf_model_01_en_5.1.1_3.0_1694548430541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lf_model_01_en_5.1.1_3.0_1694548430541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lf_model_01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lf_model_01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lf_model_01| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/lf/lf_model_01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-lm_financial_v2_en.md b/docs/_posts/ahmedlone127/2023-09-12-lm_financial_v2_en.md new file mode 100644 index 00000000000000..06a7fec6d55cc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-lm_financial_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lm_financial_v2 BertEmbeddings from anablasi +author: John Snow Labs +name: lm_financial_v2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lm_financial_v2` is a English model originally trained by anablasi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lm_financial_v2_en_5.1.1_3.0_1694551202277.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lm_financial_v2_en_5.1.1_3.0_1694551202277.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lm_financial_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lm_financial_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lm_financial_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/anablasi/lm_financial_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-logion_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-logion_base_en.md new file mode 100644 index 00000000000000..a492b8bb4662be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-logion_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English logion_base BertEmbeddings from cabrooks +author: John Snow Labs +name: logion_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`logion_base` is a English model originally trained by cabrooks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/logion_base_en_5.1.1_3.0_1694551525035.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/logion_base_en_5.1.1_3.0_1694551525035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("logion_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("logion_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|logion_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.8 MB| + +## References + +https://huggingface.co/cabrooks/LOGION-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-m3_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-m3_mlm_en.md new file mode 100644 index 00000000000000..5eef13ec6a635a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-m3_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_mlm BertEmbeddings from S2312dal +author: John Snow Labs +name: m3_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_mlm` is a English model originally trained by S2312dal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_mlm_en_5.1.1_3.0_1694551397577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_mlm_en_5.1.1_3.0_1694551397577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("m3_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("m3_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.9 MB| + +## References + +https://huggingface.co/S2312dal/M3_MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-m6_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-12-m6_mlm_en.md new file mode 100644 index 00000000000000..e853cd0fe38210 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-m6_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m6_mlm BertEmbeddings from S2312dal +author: John Snow Labs +name: m6_mlm +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m6_mlm` is a English model originally trained by S2312dal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m6_mlm_en_5.1.1_3.0_1694551618334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m6_mlm_en_5.1.1_3.0_1694551618334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("m6_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("m6_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m6_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/S2312dal/M6_MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-marathi_bert_mr.md b/docs/_posts/ahmedlone127/2023-09-12-marathi_bert_mr.md new file mode 100644 index 00000000000000..feebdcd8cbd918 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-marathi_bert_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_bert +date: 2023-09-12 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_bert` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_bert_mr_5.1.1_3.0_1694547462490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_bert_mr_5.1.1_3.0_1694547462490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_bert","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_bert", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|665.1 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_cased_en.md new file mode 100644 index 00000000000000..c4c0a9c117f5a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_2ktweets_conflibert_cont_cased BertEmbeddings from ipadmanaban +author: John Snow Labs +name: masked_2ktweets_conflibert_cont_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_2ktweets_conflibert_cont_cased` is a English model originally trained by ipadmanaban. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_cont_cased_en_5.1.1_3.0_1694508985703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_cont_cased_en_5.1.1_3.0_1694508985703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("masked_2ktweets_conflibert_cont_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("masked_2ktweets_conflibert_cont_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_2ktweets_conflibert_cont_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.9 MB| + +## References + +https://huggingface.co/ipadmanaban/Masked-2KTweets-ConfliBERT-cont-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_uncased_en.md new file mode 100644 index 00000000000000..9a37d284962cf0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_cont_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_2ktweets_conflibert_cont_uncased BertEmbeddings from ipadmanaban +author: John Snow Labs +name: masked_2ktweets_conflibert_cont_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_2ktweets_conflibert_cont_uncased` is a English model originally trained by ipadmanaban. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_cont_uncased_en_5.1.1_3.0_1694509108326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_cont_uncased_en_5.1.1_3.0_1694509108326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("masked_2ktweets_conflibert_cont_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("masked_2ktweets_conflibert_cont_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_2ktweets_conflibert_cont_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/ipadmanaban/Masked-2KTweets-ConfliBERT-cont-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_cased_en.md new file mode 100644 index 00000000000000..316c787c916fe5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_2ktweets_conflibert_scr_cased BertEmbeddings from ipadmanaban +author: John Snow Labs +name: masked_2ktweets_conflibert_scr_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_2ktweets_conflibert_scr_cased` is a English model originally trained by ipadmanaban. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_scr_cased_en_5.1.1_3.0_1694508731836.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_scr_cased_en_5.1.1_3.0_1694508731836.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("masked_2ktweets_conflibert_scr_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("masked_2ktweets_conflibert_scr_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_2ktweets_conflibert_scr_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.1 MB| + +## References + +https://huggingface.co/ipadmanaban/Masked-2KTweets-ConfliBERT-scr-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_uncased_en.md new file mode 100644 index 00000000000000..e007524b59d832 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-masked_2ktweets_conflibert_scr_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_2ktweets_conflibert_scr_uncased BertEmbeddings from ipadmanaban +author: John Snow Labs +name: masked_2ktweets_conflibert_scr_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_2ktweets_conflibert_scr_uncased` is a English model originally trained by ipadmanaban. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_scr_uncased_en_5.1.1_3.0_1694508845965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_2ktweets_conflibert_scr_uncased_en_5.1.1_3.0_1694508845965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("masked_2ktweets_conflibert_scr_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("masked_2ktweets_conflibert_scr_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_2ktweets_conflibert_scr_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.0 MB| + +## References + +https://huggingface.co/ipadmanaban/Masked-2KTweets-ConfliBERT-scr-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-matscibert_en.md b/docs/_posts/ahmedlone127/2023-09-12-matscibert_en.md new file mode 100644 index 00000000000000..200e1f51c320b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-matscibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English matscibert BertEmbeddings from m3rg-iitd +author: John Snow Labs +name: matscibert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`matscibert` is a English model originally trained by m3rg-iitd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/matscibert_en_5.1.1_3.0_1694550983161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/matscibert_en_5.1.1_3.0_1694550983161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("matscibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("matscibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|matscibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/m3rg-iitd/matscibert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_arabic_c19_ar.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_arabic_c19_ar.md new file mode 100644 index 00000000000000..424a213070f24f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_arabic_c19_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic mbert_arabic_c19 BertEmbeddings from moha +author: John Snow Labs +name: mbert_arabic_c19 +date: 2023-09-12 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_arabic_c19` is a Arabic model originally trained by moha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_arabic_c19_ar_5.1.1_3.0_1694554545798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_arabic_c19_ar_5.1.1_3.0_1694554545798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_arabic_c19","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_arabic_c19", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_arabic_c19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|624.7 MB| + +## References + +https://huggingface.co/moha/mbert_ar_c19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_chinese_en.md new file mode 100644 index 00000000000000..d26011b5a91d9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_resp_english_chinese BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_resp_english_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_resp_english_chinese` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_resp_english_chinese_en_5.1.1_3.0_1694558912316.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_resp_english_chinese_en_5.1.1_3.0_1694558912316.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_resp_english_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_resp_english_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_resp_english_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.0 MB| + +## References + +https://huggingface.co/nikitam/mbert-resp-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_german_en.md new file mode 100644 index 00000000000000..4e6a2f0393f96e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_resp_english_german BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_resp_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_resp_english_german` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_resp_english_german_en_5.1.1_3.0_1694558476877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_resp_english_german_en_5.1.1_3.0_1694558476877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_resp_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_resp_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_resp_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.2 MB| + +## References + +https://huggingface.co/nikitam/mbert-resp-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_italian_en.md new file mode 100644 index 00000000000000..3152cc6cee6769 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_resp_english_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_resp_english_italian BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_resp_english_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_resp_english_italian` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_resp_english_italian_en_5.1.1_3.0_1694558696793.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_resp_english_italian_en_5.1.1_3.0_1694558696793.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_resp_english_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_resp_english_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_resp_english_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.1 MB| + +## References + +https://huggingface.co/nikitam/mbert-resp-en-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_swedish_distilled_cased_sv.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_swedish_distilled_cased_sv.md new file mode 100644 index 00000000000000..463773b8ada81e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_swedish_distilled_cased_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish mbert_swedish_distilled_cased BertEmbeddings from Addedk +author: John Snow Labs +name: mbert_swedish_distilled_cased +date: 2023-09-12 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_swedish_distilled_cased` is a Swedish model originally trained by Addedk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_swedish_distilled_cased_sv_5.1.1_3.0_1694549731872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_swedish_distilled_cased_sv_5.1.1_3.0_1694549731872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_swedish_distilled_cased","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_swedish_distilled_cased", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_swedish_distilled_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|505.7 MB| + +## References + +https://huggingface.co/Addedk/mbert-swedish-distilled-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_chinese_en.md new file mode 100644 index 00000000000000..9f9a6aaca92bb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_chat_english_chinese BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_chat_english_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_chat_english_chinese` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_chinese_en_5.1.1_3.0_1694559504129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_chinese_en_5.1.1_3.0_1694559504129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_chat_english_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_chat_english_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_chat_english_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.0 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-chat-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_german_en.md new file mode 100644 index 00000000000000..8e156a6e394c00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_chat_english_german BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_chat_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_chat_english_german` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_german_en_5.1.1_3.0_1694559103177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_german_en_5.1.1_3.0_1694559103177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_chat_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_chat_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_chat_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.2 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-chat-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_italian_en.md new file mode 100644 index 00000000000000..5525b45002cd0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_chat_english_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_chat_english_italian BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_chat_english_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_chat_english_italian` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_italian_en_5.1.1_3.0_1694559329950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_chat_english_italian_en_5.1.1_3.0_1694559329950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_chat_english_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_chat_english_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_chat_english_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.1 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-chat-en-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_chinese_en.md new file mode 100644 index 00000000000000..c7802343bfcfa9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_sent_english_chinese BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_sent_english_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_sent_english_chinese` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_chinese_en_5.1.1_3.0_1694560100127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_chinese_en_5.1.1_3.0_1694560100127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_sent_english_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_sent_english_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_sent_english_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|622.3 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-sent-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_german_en.md new file mode 100644 index 00000000000000..ae91b6ff3b9dc0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_sent_english_german BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_sent_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_sent_english_german` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_german_en_5.1.1_3.0_1694559726822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_german_en_5.1.1_3.0_1694559726822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_sent_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_sent_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_sent_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|624.8 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-sent-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_italian_en.md new file mode 100644 index 00000000000000..67e15d0000953f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_tlm_sent_english_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_tlm_sent_english_italian BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_tlm_sent_english_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_tlm_sent_english_italian` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_italian_en_5.1.1_3.0_1694559911755.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_tlm_sent_english_italian_en_5.1.1_3.0_1694559911755.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_tlm_sent_english_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_tlm_sent_english_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_tlm_sent_english_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|622.4 MB| + +## References + +https://huggingface.co/nikitam/mbert-tlm-sent-en-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_chinese_en.md new file mode 100644 index 00000000000000..07ec58f81d5be0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_xdm_english_chinese BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_xdm_english_chinese +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_xdm_english_chinese` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_chinese_en_5.1.1_3.0_1694560769570.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_chinese_en_5.1.1_3.0_1694560769570.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_xdm_english_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_xdm_english_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_xdm_english_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|624.6 MB| + +## References + +https://huggingface.co/nikitam/mbert-xdm-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_german_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_german_en.md new file mode 100644 index 00000000000000..56975392b75a88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_german_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_xdm_english_german BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_xdm_english_german +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_xdm_english_german` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_german_en_5.1.1_3.0_1694560316540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_german_en_5.1.1_3.0_1694560316540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_xdm_english_german","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_xdm_english_german", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_xdm_english_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.4 MB| + +## References + +https://huggingface.co/nikitam/mbert-xdm-en-de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_italian_en.md b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_italian_en.md new file mode 100644 index 00000000000000..ee756f126a4d0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mbert_xdm_english_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_xdm_english_italian BertEmbeddings from nikitam +author: John Snow Labs +name: mbert_xdm_english_italian +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_xdm_english_italian` is a English model originally trained by nikitam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_italian_en_5.1.1_3.0_1694560553745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_xdm_english_italian_en_5.1.1_3.0_1694560553745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_xdm_english_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_xdm_english_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_xdm_english_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.4 MB| + +## References + +https://huggingface.co/nikitam/mbert-xdm-en-it \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-miem_scibert_linguistic_en.md b/docs/_posts/ahmedlone127/2023-09-12-miem_scibert_linguistic_en.md new file mode 100644 index 00000000000000..cef5decb871541 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-miem_scibert_linguistic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English miem_scibert_linguistic BertEmbeddings from miemBertProject +author: John Snow Labs +name: miem_scibert_linguistic +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`miem_scibert_linguistic` is a English model originally trained by miemBertProject. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/miem_scibert_linguistic_en_5.1.1_3.0_1694549092975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/miem_scibert_linguistic_en_5.1.1_3.0_1694549092975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("miem_scibert_linguistic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("miem_scibert_linguistic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|miem_scibert_linguistic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|657.4 MB| + +## References + +https://huggingface.co/miemBertProject/miem-scibert-linguistic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilm_finetuned_imdb_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilm_finetuned_imdb_accelerate_en.md new file mode 100644 index 00000000000000..7fe241693778a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilm_finetuned_imdb_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilm_finetuned_imdb_accelerate BertEmbeddings from lewtun +author: John Snow Labs +name: minilm_finetuned_imdb_accelerate +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilm_finetuned_imdb_accelerate` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilm_finetuned_imdb_accelerate_en_5.1.1_3.0_1694548276227.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilm_finetuned_imdb_accelerate_en_5.1.1_3.0_1694548276227.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilm_finetuned_imdb_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilm_finetuned_imdb_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilm_finetuned_imdb_accelerate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|124.5 MB| + +## References + +https://huggingface.co/lewtun/minilm-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilm_l12_h384_uncased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilm_l12_h384_uncased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..e339d027c3b24c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilm_l12_h384_uncased_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilm_l12_h384_uncased_finetuned_imdb BertEmbeddings from lewtun +author: John Snow Labs +name: minilm_l12_h384_uncased_finetuned_imdb +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilm_l12_h384_uncased_finetuned_imdb` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilm_l12_h384_uncased_finetuned_imdb_en_5.1.1_3.0_1694548053126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilm_l12_h384_uncased_finetuned_imdb_en_5.1.1_3.0_1694548053126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilm_l12_h384_uncased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilm_l12_h384_uncased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilm_l12_h384_uncased_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|124.5 MB| + +## References + +https://huggingface.co/lewtun/MiniLM-L12-H384-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilm_l_12_stackoverflow_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilm_l_12_stackoverflow_en.md new file mode 100644 index 00000000000000..14aec5f9c15ffb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilm_l_12_stackoverflow_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilm_l_12_stackoverflow BertEmbeddings from M-Chimiste +author: John Snow Labs +name: minilm_l_12_stackoverflow +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilm_l_12_stackoverflow` is a English model originally trained by M-Chimiste. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilm_l_12_stackoverflow_en_5.1.1_3.0_1694552531487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilm_l_12_stackoverflow_en_5.1.1_3.0_1694552531487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilm_l_12_stackoverflow","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilm_l_12_stackoverflow", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilm_l_12_stackoverflow| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|124.7 MB| + +## References + +https://huggingface.co/M-Chimiste/MiniLM-L-12-StackOverflow \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_base_en.md new file mode 100644 index 00000000000000..48068504c90f0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilmv2_l6_h384_distilled_from_bert_base BertEmbeddings from nreimers +author: John Snow Labs +name: minilmv2_l6_h384_distilled_from_bert_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilmv2_l6_h384_distilled_from_bert_base` is a English model originally trained by nreimers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_distilled_from_bert_base_en_5.1.1_3.0_1694562156137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_distilled_from_bert_base_en_5.1.1_3.0_1694562156137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilmv2_l6_h384_distilled_from_bert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilmv2_l6_h384_distilled_from_bert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilmv2_l6_h384_distilled_from_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|54.1 MB| + +## References + +https://huggingface.co/nreimers/MiniLMv2-L6-H384-distilled-from-BERT-Base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_large_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_large_en.md new file mode 100644 index 00000000000000..d7dbada6e7d0b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h384_distilled_from_bert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilmv2_l6_h384_distilled_from_bert_large BertEmbeddings from nreimers +author: John Snow Labs +name: minilmv2_l6_h384_distilled_from_bert_large +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilmv2_l6_h384_distilled_from_bert_large` is a English model originally trained by nreimers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_distilled_from_bert_large_en_5.1.1_3.0_1694562262970.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h384_distilled_from_bert_large_en_5.1.1_3.0_1694562262970.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilmv2_l6_h384_distilled_from_bert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilmv2_l6_h384_distilled_from_bert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilmv2_l6_h384_distilled_from_bert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|54.2 MB| + +## References + +https://huggingface.co/nreimers/MiniLMv2-L6-H384-distilled-from-BERT-Large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_base_en.md new file mode 100644 index 00000000000000..47347c85f09f3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilmv2_l6_h768_distilled_from_bert_base BertEmbeddings from nreimers +author: John Snow Labs +name: minilmv2_l6_h768_distilled_from_bert_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilmv2_l6_h768_distilled_from_bert_base` is a English model originally trained by nreimers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h768_distilled_from_bert_base_en_5.1.1_3.0_1694562396390.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h768_distilled_from_bert_base_en_5.1.1_3.0_1694562396390.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilmv2_l6_h768_distilled_from_bert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilmv2_l6_h768_distilled_from_bert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilmv2_l6_h768_distilled_from_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|158.7 MB| + +## References + +https://huggingface.co/nreimers/MiniLMv2-L6-H768-distilled-from-BERT-Base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_large_en.md b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_large_en.md new file mode 100644 index 00000000000000..f95a02a6a5034f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-minilmv2_l6_h768_distilled_from_bert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilmv2_l6_h768_distilled_from_bert_large BertEmbeddings from nreimers +author: John Snow Labs +name: minilmv2_l6_h768_distilled_from_bert_large +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilmv2_l6_h768_distilled_from_bert_large` is a English model originally trained by nreimers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h768_distilled_from_bert_large_en_5.1.1_3.0_1694562531906.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilmv2_l6_h768_distilled_from_bert_large_en_5.1.1_3.0_1694562531906.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilmv2_l6_h768_distilled_from_bert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilmv2_l6_h768_distilled_from_bert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilmv2_l6_h768_distilled_from_bert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|158.8 MB| + +## References + +https://huggingface.co/nreimers/MiniLMv2-L6-H768-distilled-from-BERT-Large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mizbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-mizbert_en.md new file mode 100644 index 00000000000000..6892e5120e8096 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mizbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mizbert BertEmbeddings from robzchhangte +author: John Snow Labs +name: mizbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mizbert` is a English model originally trained by robzchhangte. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mizbert_en_5.1.1_3.0_1694556095325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mizbert_en_5.1.1_3.0_1694556095325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mizbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mizbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mizbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/robzchhangte/MizBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p1_combined_001_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p1_combined_001_en.md new file mode 100644 index 00000000000000..7696cc278c79aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p1_combined_001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230503_indobert_base_p1_combined_001 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230503_indobert_base_p1_combined_001 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230503_indobert_base_p1_combined_001` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_base_p1_combined_001_en_5.1.1_3.0_1694551387527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_base_p1_combined_001_en_5.1.1_3.0_1694551387527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230503_indobert_base_p1_combined_001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230503_indobert_base_p1_combined_001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230503_indobert_base_p1_combined_001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.3 MB| + +## References + +https://huggingface.co/intanm/mlm-20230503-indobert-base-p1-combined-001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p2_002_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p2_002_en.md new file mode 100644 index 00000000000000..2076a704a9435c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_base_p2_002_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230503_indobert_base_p2_002 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230503_indobert_base_p2_002 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230503_indobert_base_p2_002` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_base_p2_002_en_5.1.1_3.0_1694550333162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_base_p2_002_en_5.1.1_3.0_1694550333162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230503_indobert_base_p2_002","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230503_indobert_base_p2_002", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230503_indobert_base_p2_002| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230503-indobert-base-p2-002 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_large_p1_001_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_large_p1_001_en.md new file mode 100644 index 00000000000000..b805b7b8824326 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230503_indobert_large_p1_001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230503_indobert_large_p1_001 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230503_indobert_large_p1_001 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230503_indobert_large_p1_001` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_large_p1_001_en_5.1.1_3.0_1694550821233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230503_indobert_large_p1_001_en_5.1.1_3.0_1694550821233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230503_indobert_large_p1_001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230503_indobert_large_p1_001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230503_indobert_large_p1_001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230503-indobert-large-p1-001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230510_indobert_large_p1_001_pt2_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230510_indobert_large_p1_001_pt2_en.md new file mode 100644 index 00000000000000..18a543b38a09f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230510_indobert_large_p1_001_pt2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230510_indobert_large_p1_001_pt2 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230510_indobert_large_p1_001_pt2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230510_indobert_large_p1_001_pt2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230510_indobert_large_p1_001_pt2_en_5.1.1_3.0_1694557161327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230510_indobert_large_p1_001_pt2_en_5.1.1_3.0_1694557161327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230510_indobert_large_p1_001_pt2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230510_indobert_large_p1_001_pt2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230510_indobert_large_p1_001_pt2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230510-indobert-large-p1-001-pt2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230511_indobert_large_p1_combined_pt1_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230511_indobert_large_p1_combined_pt1_en.md new file mode 100644 index 00000000000000..9dbe37041d55f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230511_indobert_large_p1_combined_pt1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230511_indobert_large_p1_combined_pt1 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230511_indobert_large_p1_combined_pt1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230511_indobert_large_p1_combined_pt1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230511_indobert_large_p1_combined_pt1_en_5.1.1_3.0_1694557729072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230511_indobert_large_p1_combined_pt1_en_5.1.1_3.0_1694557729072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230511_indobert_large_p1_combined_pt1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230511_indobert_large_p1_combined_pt1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230511_indobert_large_p1_combined_pt1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230511-indobert-large-p1-combined-pt1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_002_pt1_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_002_pt1_en.md new file mode 100644 index 00000000000000..8f532d96fde757 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_002_pt1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230513_indobert_large_p1_002_pt1 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230513_indobert_large_p1_002_pt1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230513_indobert_large_p1_002_pt1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230513_indobert_large_p1_002_pt1_en_5.1.1_3.0_1694560785628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230513_indobert_large_p1_002_pt1_en_5.1.1_3.0_1694560785628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230513_indobert_large_p1_002_pt1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230513_indobert_large_p1_002_pt1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230513_indobert_large_p1_002_pt1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230513-indobert-large-p1-002-pt1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_002_pt2_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_002_pt2_en.md new file mode 100644 index 00000000000000..595e95f4a87ad0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_002_pt2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230513_indobert_large_p1_002_pt2 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230513_indobert_large_p1_002_pt2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230513_indobert_large_p1_002_pt2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230513_indobert_large_p1_002_pt2_en_5.1.1_3.0_1694561058491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230513_indobert_large_p1_002_pt2_en_5.1.1_3.0_1694561058491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230513_indobert_large_p1_002_pt2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230513_indobert_large_p1_002_pt2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230513_indobert_large_p1_002_pt2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230513-indobert-large-p1-002-pt2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_combined_pt2_en.md b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_combined_pt2_en.md new file mode 100644 index 00000000000000..831aff4aaf1d0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mlm_20230513_indobert_large_p1_combined_pt2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230513_indobert_large_p1_combined_pt2 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230513_indobert_large_p1_combined_pt2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230513_indobert_large_p1_combined_pt2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230513_indobert_large_p1_combined_pt2_en_5.1.1_3.0_1694560283117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230513_indobert_large_p1_combined_pt2_en_5.1.1_3.0_1694560283117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230513_indobert_large_p1_combined_pt2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230513_indobert_large_p1_combined_pt2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230513_indobert_large_p1_combined_pt2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intanm/mlm-20230513-indobert-large-p1-combined-pt2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-model202109_en.md b/docs/_posts/ahmedlone127/2023-09-12-model202109_en.md new file mode 100644 index 00000000000000..4cf88950d8266d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-model202109_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model202109 BertEmbeddings from lyx10290516 +author: John Snow Labs +name: model202109 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model202109` is a English model originally trained by lyx10290516. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model202109_en_5.1.1_3.0_1694550495262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model202109_en_5.1.1_3.0_1694550495262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model202109","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model202109", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model202109| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/lyx10290516/model202109 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-muril_adapted_local_xx.md b/docs/_posts/ahmedlone127/2023-09-12-muril_adapted_local_xx.md new file mode 100644 index 00000000000000..dab1e578462ca2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-muril_adapted_local_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual muril_adapted_local BertEmbeddings from monsoon-nlp +author: John Snow Labs +name: muril_adapted_local +date: 2023-09-12 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`muril_adapted_local` is a Multilingual model originally trained by monsoon-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/muril_adapted_local_xx_5.1.1_3.0_1694555822467.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/muril_adapted_local_xx_5.1.1_3.0_1694555822467.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("muril_adapted_local","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("muril_adapted_local", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|muril_adapted_local| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|885.8 MB| + +## References + +https://huggingface.co/monsoon-nlp/muril-adapted-local \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mybert_en.md b/docs/_posts/ahmedlone127/2023-09-12-mybert_en.md new file mode 100644 index 00000000000000..93e3eacbe46f8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mybert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mybert BertEmbeddings from RavenK +author: John Snow Labs +name: mybert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mybert` is a English model originally trained by RavenK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mybert_en_5.1.1_3.0_1694548245920.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mybert_en_5.1.1_3.0_1694548245920.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mybert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mybert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mybert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/RavenK/mybert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-mymodel007_lbh020300_en.md b/docs/_posts/ahmedlone127/2023-09-12-mymodel007_lbh020300_en.md new file mode 100644 index 00000000000000..633fab71a48ce9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-mymodel007_lbh020300_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel007_lbh020300 BertEmbeddings from lbh020300 +author: John Snow Labs +name: mymodel007_lbh020300 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel007_lbh020300` is a English model originally trained by lbh020300. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel007_lbh020300_en_5.1.1_3.0_1694547914311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel007_lbh020300_en_5.1.1_3.0_1694547914311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel007_lbh020300","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel007_lbh020300", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel007_lbh020300| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/lbh020300/mymodel007 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-myrubert_tiny2_en.md b/docs/_posts/ahmedlone127/2023-09-12-myrubert_tiny2_en.md new file mode 100644 index 00000000000000..a4f1b599f7653c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-myrubert_tiny2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English myrubert_tiny2 BertEmbeddings from nlp-testing +author: John Snow Labs +name: myrubert_tiny2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`myrubert_tiny2` is a English model originally trained by nlp-testing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/myrubert_tiny2_en_5.1.1_3.0_1694555345139.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/myrubert_tiny2_en_5.1.1_3.0_1694555345139.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("myrubert_tiny2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("myrubert_tiny2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|myrubert_tiny2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|109.0 MB| + +## References + +https://huggingface.co/nlp-testing/myrubert-tiny2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-nepal_bhasa_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-nepal_bhasa_bert_en.md new file mode 100644 index 00000000000000..3595373bc137f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-nepal_bhasa_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nepal_bhasa_bert BertEmbeddings from onlydj96 +author: John Snow Labs +name: nepal_bhasa_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_bert` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_bert_en_5.1.1_3.0_1694549877596.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_bert_en_5.1.1_3.0_1694549877596.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nepal_bhasa_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nepal_bhasa_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.1 MB| + +## References + +https://huggingface.co/onlydj96/new_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-nontoxiccivilbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-nontoxiccivilbert_en.md new file mode 100644 index 00000000000000..1b6223c22ac5e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-nontoxiccivilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nontoxiccivilbert BertEmbeddings from Ashokajou51 +author: John Snow Labs +name: nontoxiccivilbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nontoxiccivilbert` is a English model originally trained by Ashokajou51. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nontoxiccivilbert_en_5.1.1_3.0_1694558265623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nontoxiccivilbert_en_5.1.1_3.0_1694558265623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nontoxiccivilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nontoxiccivilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nontoxiccivilbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Ashokajou51/NonToxicCivilBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-norbert2_no.md b/docs/_posts/ahmedlone127/2023-09-12-norbert2_no.md new file mode 100644 index 00000000000000..213b1d443ca767 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-norbert2_no.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Norwegian norbert2 BertEmbeddings from ltg +author: John Snow Labs +name: norbert2 +date: 2023-09-12 +tags: [bert, "no", open_source, fill_mask, onnx] +task: Embeddings +language: "no" +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert2` is a Norwegian model originally trained by ltg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert2_no_5.1.1_3.0_1694549723258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert2_no_5.1.1_3.0_1694549723258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("norbert2","no") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("norbert2", "no") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|no| +|Size:|465.2 MB| + +## References + +https://huggingface.co/ltg/norbert2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-norbert_no.md b/docs/_posts/ahmedlone127/2023-09-12-norbert_no.md new file mode 100644 index 00000000000000..a4df72e7efb168 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-norbert_no.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Norwegian norbert BertEmbeddings from ltg +author: John Snow Labs +name: norbert +date: 2023-09-12 +tags: [bert, "no", open_source, fill_mask, onnx] +task: Embeddings +language: "no" +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`norbert` is a Norwegian model originally trained by ltg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/norbert_no_5.1.1_3.0_1694549544686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/norbert_no_5.1.1_3.0_1694549544686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("norbert","no") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("norbert", "no") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|norbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|no| +|Size:|415.1 MB| + +## References + +https://huggingface.co/ltg/norbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_bert_en.md new file mode 100644 index 00000000000000..3232ea9752d74a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English original_topic_sports_bert BertEmbeddings from Kdogs +author: John Snow Labs +name: original_topic_sports_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`original_topic_sports_bert` is a English model originally trained by Kdogs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/original_topic_sports_bert_en_5.1.1_3.0_1694560482579.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/original_topic_sports_bert_en_5.1.1_3.0_1694560482579.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("original_topic_sports_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("original_topic_sports_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|original_topic_sports_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/Kdogs/original_topic-sports_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_kcbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_kcbert_en.md new file mode 100644 index 00000000000000..18a9ab6eb73789 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-original_topic_sports_kcbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English original_topic_sports_kcbert BertEmbeddings from Kdogs +author: John Snow Labs +name: original_topic_sports_kcbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`original_topic_sports_kcbert` is a English model originally trained by Kdogs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/original_topic_sports_kcbert_en_5.1.1_3.0_1694561238255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/original_topic_sports_kcbert_en_5.1.1_3.0_1694561238255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("original_topic_sports_kcbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("original_topic_sports_kcbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|original_topic_sports_kcbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.3 MB| + +## References + +https://huggingface.co/Kdogs/original_topic-sports_kcbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-pharmbert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-pharmbert_cased_en.md new file mode 100644 index 00000000000000..ed24d3598bf5d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-pharmbert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pharmbert_cased BertEmbeddings from Lianglab +author: John Snow Labs +name: pharmbert_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pharmbert_cased` is a English model originally trained by Lianglab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pharmbert_cased_en_5.1.1_3.0_1694557329356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pharmbert_cased_en_5.1.1_3.0_1694557329356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pharmbert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pharmbert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pharmbert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/Lianglab/PharmBERT-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-pharmbert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-pharmbert_uncased_en.md new file mode 100644 index 00000000000000..5f2cb33bf1ba7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-pharmbert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pharmbert_uncased BertEmbeddings from Lianglab +author: John Snow Labs +name: pharmbert_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pharmbert_uncased` is a English model originally trained by Lianglab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pharmbert_uncased_en_5.1.1_3.0_1694557457237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pharmbert_uncased_en_5.1.1_3.0_1694557457237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pharmbert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pharmbert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pharmbert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Lianglab/PharmBERT-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-politbert_en.md b/docs/_posts/ahmedlone127/2023-09-12-politbert_en.md new file mode 100644 index 00000000000000..195e729b9ccd71 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-politbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English politbert BertEmbeddings from maurice +author: John Snow Labs +name: politbert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`politbert` is a English model originally trained by maurice. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/politbert_en_5.1.1_3.0_1694552201401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/politbert_en_5.1.1_3.0_1694552201401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("politbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("politbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|politbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/maurice/PolitBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-politibeto_es.md b/docs/_posts/ahmedlone127/2023-09-12-politibeto_es.md new file mode 100644 index 00000000000000..bc5c7353660b4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-politibeto_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish politibeto BertEmbeddings from nlp-cimat +author: John Snow Labs +name: politibeto +date: 2023-09-12 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`politibeto` is a Castilian, Spanish model originally trained by nlp-cimat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/politibeto_es_5.1.1_3.0_1694552289989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/politibeto_es_5.1.1_3.0_1694552289989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("politibeto","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("politibeto", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|politibeto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.6 MB| + +## References + +https://huggingface.co/nlp-cimat/politibeto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-practice00_en.md b/docs/_posts/ahmedlone127/2023-09-12-practice00_en.md new file mode 100644 index 00000000000000..f9d0a6824d6643 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-practice00_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English practice00 BertEmbeddings from maroo93 +author: John Snow Labs +name: practice00 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`practice00` is a English model originally trained by maroo93. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/practice00_en_5.1.1_3.0_1694551890248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/practice00_en_5.1.1_3.0_1694551890248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("practice00","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("practice00", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|practice00| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/maroo93/practice00 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-practice01_en.md b/docs/_posts/ahmedlone127/2023-09-12-practice01_en.md new file mode 100644 index 00000000000000..5f9e32a79436c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-practice01_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English practice01 BertEmbeddings from maroo93 +author: John Snow Labs +name: practice01 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`practice01` is a English model originally trained by maroo93. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/practice01_en_5.1.1_3.0_1694552050711.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/practice01_en_5.1.1_3.0_1694552050711.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("practice01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("practice01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|practice01| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/maroo93/practice01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-prop_marco_en.md b/docs/_posts/ahmedlone127/2023-09-12-prop_marco_en.md new file mode 100644 index 00000000000000..66090ef8f1d566 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-prop_marco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English prop_marco BertEmbeddings from xyma +author: John Snow Labs +name: prop_marco +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`prop_marco` is a English model originally trained by xyma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/prop_marco_en_5.1.1_3.0_1694559633981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/prop_marco_en_5.1.1_3.0_1694559633981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("prop_marco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("prop_marco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|prop_marco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/xyma/PROP-marco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-prop_wiki_en.md b/docs/_posts/ahmedlone127/2023-09-12-prop_wiki_en.md new file mode 100644 index 00000000000000..aeb8ae5ad638cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-prop_wiki_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English prop_wiki BertEmbeddings from xyma +author: John Snow Labs +name: prop_wiki +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`prop_wiki` is a English model originally trained by xyma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/prop_wiki_en_5.1.1_3.0_1694559423925.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/prop_wiki_en_5.1.1_3.0_1694559423925.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("prop_wiki","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("prop_wiki", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|prop_wiki| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/xyma/PROP-wiki \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-psych_search_en.md b/docs/_posts/ahmedlone127/2023-09-12-psych_search_en.md new file mode 100644 index 00000000000000..48d5a48e6daeaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-psych_search_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English psych_search BertEmbeddings from nlp4good +author: John Snow Labs +name: psych_search +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`psych_search` is a English model originally trained by nlp4good. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/psych_search_en_5.1.1_3.0_1694560934620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/psych_search_en_5.1.1_3.0_1694560934620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("psych_search","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("psych_search", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|psych_search| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/nlp4good/psych-search \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-push_hub_test_en.md b/docs/_posts/ahmedlone127/2023-09-12-push_hub_test_en.md new file mode 100644 index 00000000000000..b95fccd27651ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-push_hub_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English push_hub_test BertEmbeddings from ksmcg +author: John Snow Labs +name: push_hub_test +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`push_hub_test` is a English model originally trained by ksmcg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/push_hub_test_en_5.1.1_3.0_1694510535660.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/push_hub_test_en_5.1.1_3.0_1694510535660.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("push_hub_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("push_hub_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|push_hub_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/ksmcg/push_hub_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-quranexe_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-quranexe_bert_en.md new file mode 100644 index 00000000000000..4bbdd21ce194d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-quranexe_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English quranexe_bert BertEmbeddings from mustapha +author: John Snow Labs +name: quranexe_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`quranexe_bert` is a English model originally trained by mustapha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/quranexe_bert_en_5.1.1_3.0_1694558398605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/quranexe_bert_en_5.1.1_3.0_1694558398605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("quranexe_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("quranexe_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|quranexe_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.6 MB| + +## References + +https://huggingface.co/mustapha/QuranExe-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-recipe_clean_steps_en.md b/docs/_posts/ahmedlone127/2023-09-12-recipe_clean_steps_en.md new file mode 100644 index 00000000000000..417c168c798e0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-recipe_clean_steps_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_clean_steps BertEmbeddings from paola-md +author: John Snow Labs +name: recipe_clean_steps +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_clean_steps` is a English model originally trained by paola-md. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_clean_steps_en_5.1.1_3.0_1694553199740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_clean_steps_en_5.1.1_3.0_1694553199740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_clean_steps","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_clean_steps", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_clean_steps| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/paola-md/recipe-clean_steps \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-recipe_icelandic_en.md b/docs/_posts/ahmedlone127/2023-09-12-recipe_icelandic_en.md new file mode 100644 index 00000000000000..1cb03fb36da554 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-recipe_icelandic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_icelandic BertEmbeddings from paola-md +author: John Snow Labs +name: recipe_icelandic +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_icelandic` is a English model originally trained by paola-md. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_icelandic_en_5.1.1_3.0_1694552871118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_icelandic_en_5.1.1_3.0_1694552871118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_icelandic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_icelandic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_icelandic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/paola-md/recipe-is \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-recipe_tis_en.md b/docs/_posts/ahmedlone127/2023-09-12-recipe_tis_en.md new file mode 100644 index 00000000000000..729cacd4bd5546 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-recipe_tis_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_tis BertEmbeddings from paola-md +author: John Snow Labs +name: recipe_tis +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_tis` is a English model originally trained by paola-md. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_tis_en_5.1.1_3.0_1694552674837.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_tis_en_5.1.1_3.0_1694552674837.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_tis","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_tis", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_tis| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.5 MB| + +## References + +https://huggingface.co/paola-md/recipe-tis \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-recipe_tsonga_en.md b/docs/_posts/ahmedlone127/2023-09-12-recipe_tsonga_en.md new file mode 100644 index 00000000000000..98ebf54472d533 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-recipe_tsonga_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_tsonga BertEmbeddings from paola-md +author: John Snow Labs +name: recipe_tsonga +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_tsonga` is a English model originally trained by paola-md. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_tsonga_en_5.1.1_3.0_1694553036897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_tsonga_en_5.1.1_3.0_1694553036897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_tsonga","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_tsonga", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_tsonga| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/paola-md/recipe-ts \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-regex_gb_2021_en.md b/docs/_posts/ahmedlone127/2023-09-12-regex_gb_2021_en.md new file mode 100644 index 00000000000000..f2188a7025182c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-regex_gb_2021_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English regex_gb_2021 BertEmbeddings from mossaic-candle +author: John Snow Labs +name: regex_gb_2021 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`regex_gb_2021` is a English model originally trained by mossaic-candle. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/regex_gb_2021_en_5.1.1_3.0_1694508052972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/regex_gb_2021_en_5.1.1_3.0_1694508052972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("regex_gb_2021","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("regex_gb_2021", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|regex_gb_2021| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.7 MB| + +## References + +https://huggingface.co/mossaic-candle/regex-gb-2021 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-relu_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-12-relu_bert_base_uncased_en.md new file mode 100644 index 00000000000000..6d7f6bb1a6785b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-relu_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English relu_bert_base_uncased BertEmbeddings from mpiorczynski +author: John Snow Labs +name: relu_bert_base_uncased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`relu_bert_base_uncased` is a English model originally trained by mpiorczynski. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/relu_bert_base_uncased_en_5.1.1_3.0_1694552781766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/relu_bert_base_uncased_en_5.1.1_3.0_1694552781766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("relu_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("relu_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|relu_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/mpiorczynski/relu-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-review_en.md b/docs/_posts/ahmedlone127/2023-09-12-review_en.md new file mode 100644 index 00000000000000..197f37101b5c9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-review_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English review BertEmbeddings from Hikam22 +author: John Snow Labs +name: review +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`review` is a English model originally trained by Hikam22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/review_en_5.1.1_3.0_1694548928369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/review_en_5.1.1_3.0_1694548928369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("review","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("review", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|review| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|304.8 MB| + +## References + +https://huggingface.co/Hikam22/Review \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-romanian_bert_tweet_large_ro.md b/docs/_posts/ahmedlone127/2023-09-12-romanian_bert_tweet_large_ro.md new file mode 100644 index 00000000000000..81cd4e646f5662 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-romanian_bert_tweet_large_ro.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Moldavian, Moldovan, Romanian romanian_bert_tweet_large BertEmbeddings from Iulian277 +author: John Snow Labs +name: romanian_bert_tweet_large +date: 2023-09-12 +tags: [bert, ro, open_source, fill_mask, onnx] +task: Embeddings +language: ro +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`romanian_bert_tweet_large` is a Moldavian, Moldovan, Romanian model originally trained by Iulian277. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/romanian_bert_tweet_large_ro_5.1.1_3.0_1694557360243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/romanian_bert_tweet_large_ro_5.1.1_3.0_1694557360243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("romanian_bert_tweet_large","ro") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("romanian_bert_tweet_large", "ro") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|romanian_bert_tweet_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ro| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Iulian277/ro-bert-tweet-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sample_model_en.md b/docs/_posts/ahmedlone127/2023-09-12-sample_model_en.md new file mode 100644 index 00000000000000..912b4592909e62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sample_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sample_model BertEmbeddings from paopao0226 +author: John Snow Labs +name: sample_model +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sample_model` is a English model originally trained by paopao0226. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sample_model_en_5.1.1_3.0_1694509238918.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sample_model_en_5.1.1_3.0_1694509238918.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sample_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sample_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sample_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/paopao0226/sample-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-satd_identify_en.md b/docs/_posts/ahmedlone127/2023-09-12-satd_identify_en.md new file mode 100644 index 00000000000000..e993ed0a4e0b9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-satd_identify_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English satd_identify BertEmbeddings from aavvvv +author: John Snow Labs +name: satd_identify +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`satd_identify` is a English model originally trained by aavvvv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/satd_identify_en_5.1.1_3.0_1694557773164.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/satd_identify_en_5.1.1_3.0_1694557773164.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("satd_identify","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("satd_identify", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|satd_identify| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/aavvvv/satd-identify \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-scibert_lm_const_finetuned_20_en.md b/docs/_posts/ahmedlone127/2023-09-12-scibert_lm_const_finetuned_20_en.md new file mode 100644 index 00000000000000..95bdfad4001cd1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-scibert_lm_const_finetuned_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_lm_const_finetuned_20 BertEmbeddings from ariesutiono +author: John Snow Labs +name: scibert_lm_const_finetuned_20 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_lm_const_finetuned_20` is a English model originally trained by ariesutiono. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_lm_const_finetuned_20_en_5.1.1_3.0_1694559809758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_lm_const_finetuned_20_en_5.1.1_3.0_1694559809758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_lm_const_finetuned_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_lm_const_finetuned_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_lm_const_finetuned_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|499.5 MB| + +## References + +https://huggingface.co/ariesutiono/scibert-lm-const-finetuned-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_finetuned_cord19_en.md b/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_finetuned_cord19_en.md new file mode 100644 index 00000000000000..07418accd72d53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_finetuned_cord19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_finetuned_cord19 BertEmbeddings from mrm8488 +author: John Snow Labs +name: scibert_scivocab_finetuned_cord19 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_finetuned_cord19` is a English model originally trained by mrm8488. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_finetuned_cord19_en_5.1.1_3.0_1694556414909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_finetuned_cord19_en_5.1.1_3.0_1694556414909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_finetuned_cord19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_finetuned_cord19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_finetuned_cord19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/mrm8488/scibert_scivocab-finetuned-CORD19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_uncased_ft_mlm_sdu21_ai_en.md b/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_uncased_ft_mlm_sdu21_ai_en.md new file mode 100644 index 00000000000000..9565c044e1500c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-scibert_scivocab_uncased_ft_mlm_sdu21_ai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_ft_mlm_sdu21_ai BertEmbeddings from napsternxg +author: John Snow Labs +name: scibert_scivocab_uncased_ft_mlm_sdu21_ai +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_ft_mlm_sdu21_ai` is a English model originally trained by napsternxg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_ft_mlm_sdu21_ai_en_5.1.1_3.0_1694557218055.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_ft_mlm_sdu21_ai_en_5.1.1_3.0_1694557218055.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_ft_mlm_sdu21_ai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_ft_mlm_sdu21_ai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_ft_mlm_sdu21_ai| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/napsternxg/scibert_scivocab_uncased_ft_mlm_SDU21_AI \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sec_bert_base_en.md b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_base_en.md new file mode 100644 index 00000000000000..c9b1aca7e0f539 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sec_bert_base BertEmbeddings from nlpaueb +author: John Snow Labs +name: sec_bert_base +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sec_bert_base` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sec_bert_base_en_5.1.1_3.0_1694561781228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sec_bert_base_en_5.1.1_3.0_1694561781228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sec_bert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sec_bert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sec_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.5 MB| + +## References + +https://huggingface.co/nlpaueb/sec-bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sec_bert_num_en.md b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_num_en.md new file mode 100644 index 00000000000000..469ff415dafe13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_num_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sec_bert_num BertEmbeddings from nlpaueb +author: John Snow Labs +name: sec_bert_num +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sec_bert_num` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sec_bert_num_en_5.1.1_3.0_1694561918110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sec_bert_num_en_5.1.1_3.0_1694561918110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sec_bert_num","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sec_bert_num", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sec_bert_num| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/nlpaueb/sec-bert-num \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sec_bert_shape_en.md b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_shape_en.md new file mode 100644 index 00000000000000..8a2d6149f908f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sec_bert_shape_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sec_bert_shape BertEmbeddings from nlpaueb +author: John Snow Labs +name: sec_bert_shape +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sec_bert_shape` is a English model originally trained by nlpaueb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sec_bert_shape_en_5.1.1_3.0_1694562038680.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sec_bert_shape_en_5.1.1_3.0_1694562038680.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sec_bert_shape","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sec_bert_shape", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sec_bert_shape| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/nlpaueb/sec-bert-shape \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_base_uncased_finetuned_with_haystack_en.md b/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_base_uncased_finetuned_with_haystack_en.md new file mode 100644 index 00000000000000..6edab9d5cd7a1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_base_uncased_finetuned_with_haystack_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English set_date_1_bert_base_uncased_finetuned_with_haystack BertEmbeddings from motiondew +author: John Snow Labs +name: set_date_1_bert_base_uncased_finetuned_with_haystack +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`set_date_1_bert_base_uncased_finetuned_with_haystack` is a English model originally trained by motiondew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/set_date_1_bert_base_uncased_finetuned_with_haystack_en_5.1.1_3.0_1694556116394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/set_date_1_bert_base_uncased_finetuned_with_haystack_en_5.1.1_3.0_1694556116394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("set_date_1_bert_base_uncased_finetuned_with_haystack","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("set_date_1_bert_base_uncased_finetuned_with_haystack", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|set_date_1_bert_base_uncased_finetuned_with_haystack| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/motiondew/set_date_1_bert-base-uncased_finetuned_with_haystack \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_en.md new file mode 100644 index 00000000000000..bf626cb2fa66c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-set_date_1_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English set_date_1_bert BertEmbeddings from motiondew +author: John Snow Labs +name: set_date_1_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`set_date_1_bert` is a English model originally trained by motiondew. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/set_date_1_bert_en_5.1.1_3.0_1694555975958.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/set_date_1_bert_en_5.1.1_3.0_1694555975958.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("set_date_1_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("set_date_1_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|set_date_1_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/motiondew/set_date_1-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_custom_tokenizer_en.md new file mode 100644 index 00000000000000..b8ea3e2415a253 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_conll2003_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_conll2003_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_conll2003_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_custom_tokenizer_en_5.1.1_3.0_1694549308806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_custom_tokenizer_en_5.1.1_3.0_1694549308806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_conll2003_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_conll2003_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_conll2003_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-conll2003-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_en.md new file mode 100644 index 00000000000000..c76e180e686d07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_conll2003 BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_conll2003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_conll2003` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_en_5.1.1_3.0_1694549226176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_en_5.1.1_3.0_1694549226176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_conll2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_conll2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_conll2003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-conll2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_from_scratch_en.md new file mode 100644 index 00000000000000..07fbaa12c1f3cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_conll2003_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_conll2003_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_conll2003_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_conll2003_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_from_scratch_en_5.1.1_3.0_1694555844684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_conll2003_from_scratch_en_5.1.1_3.0_1694555844684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_conll2003_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_conll2003_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_conll2003_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-conll2003-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_en.md new file mode 100644 index 00000000000000..6d9d9821b8bb12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_cola_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_cola_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_cola_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_custom_tokenizer_en_5.1.1_3.0_1694509168848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_custom_tokenizer_en_5.1.1_3.0_1694509168848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_cola_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_cola_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_cola_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.7 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-cola-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..4174b6cd63d5ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_cola_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_cola_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_cola_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561193785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561193785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_cola_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_cola_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_cola_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|112.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-cola-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_en.md new file mode 100644 index 00000000000000..8f757685b7a828 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_cola BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_cola +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_cola` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_en_5.1.1_3.0_1694508239344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_en_5.1.1_3.0_1694508239344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_cola","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_cola", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_cola| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-cola \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..24e218dc859816 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561372145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561372145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|112.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-cola-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..c31269ced1c35b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_custom_tokenizer_en_5.1.1_3.0_1694509261561.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_custom_tokenizer_en_5.1.1_3.0_1694509261561.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..043b61fafdca4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561610937.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561610937.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|132.3 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_en.md new file mode 100644 index 00000000000000..2332ba65e00450 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mnli BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mnli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_en_5.1.1_3.0_1694508322641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_en_5.1.1_3.0_1694508322641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mnli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..4e699bc0e5e97b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561907695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561907695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|132.3 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_en.md new file mode 100644 index 00000000000000..bd525709fd0d84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mrpc_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mrpc_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mrpc_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_custom_tokenizer_en_5.1.1_3.0_1694509478690.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_custom_tokenizer_en_5.1.1_3.0_1694509478690.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mrpc_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mrpc_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mrpc_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mrpc-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..38d99dfd891555 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mrpc_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mrpc_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mrpc_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562480371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562480371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mrpc_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mrpc_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mrpc_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|120.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mrpc-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_en.md new file mode 100644 index 00000000000000..bbb332065ce655 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mrpc BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mrpc +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mrpc` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_en_5.1.1_3.0_1694508535779.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_en_5.1.1_3.0_1694508535779.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mrpc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mrpc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mrpc| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mrpc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..52fbd586d3f702 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562732313.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562732313.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|120.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-mrpc-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..97470becbbd9c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_custom_tokenizer_en_5.1.1_3.0_1694509564873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_custom_tokenizer_en_5.1.1_3.0_1694509564873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..7eb6df9e905eec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563099212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563099212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|135.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_en.md new file mode 100644 index 00000000000000..71498ecaadd4d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_en_5.1.1_3.0_1694508618886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_en_5.1.1_3.0_1694508618886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md new file mode 100644 index 00000000000000..045f2965f76d2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli_from_scratch_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli_from_scratch_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli_from_scratch_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694552340480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694552340480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli_from_scratch_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli_from_scratch_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli_from_scratch_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli-from-scratch-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_custom_tokenizer_en.md new file mode 100644 index 00000000000000..bd5955a1f6e764 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qqp_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qqp_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qqp_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_custom_tokenizer_en_5.1.1_3.0_1694509649898.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_custom_tokenizer_en_5.1.1_3.0_1694509649898.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qqp_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qqp_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qqp_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qqp-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_en.md new file mode 100644 index 00000000000000..6e4f20b08ad113 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_qqp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qqp BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qqp +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qqp` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_en_5.1.1_3.0_1694508712348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_en_5.1.1_3.0_1694508712348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qqp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qqp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qqp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qqp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_custom_tokenizer_en.md new file mode 100644 index 00000000000000..4d21a83f168089 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_rte_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_rte_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_rte_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_custom_tokenizer_en_5.1.1_3.0_1694509735612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_custom_tokenizer_en_5.1.1_3.0_1694509735612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_rte_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_rte_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_rte_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-rte-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_en.md new file mode 100644 index 00000000000000..e259afbe76f735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_rte_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_rte BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_rte +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_rte` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_en_5.1.1_3.0_1694508793653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_en_5.1.1_3.0_1694508793653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_rte","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_rte", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_rte| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-rte \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_custom_tokenizer_en.md new file mode 100644 index 00000000000000..39f5f36b12d5e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_sst2_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_sst2_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_sst2_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_custom_tokenizer_en_5.1.1_3.0_1694509835393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_custom_tokenizer_en_5.1.1_3.0_1694509835393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_sst2_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_sst2_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_sst2_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-sst2-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_en.md new file mode 100644 index 00000000000000..0ccb73aaf94f88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_sst2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_sst2 BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_sst2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_sst2` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_en_5.1.1_3.0_1694508920238.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_en_5.1.1_3.0_1694508920238.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_sst2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_sst2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_sst2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-sst2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_custom_tokenizer_en.md new file mode 100644 index 00000000000000..99565eded80454 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_stsb_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_stsb_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_stsb_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_custom_tokenizer_en_5.1.1_3.0_1694509912489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_custom_tokenizer_en_5.1.1_3.0_1694509912489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_stsb_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_stsb_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_stsb_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-stsb-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_en.md new file mode 100644 index 00000000000000..bfa92c4f11fa24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_stsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_stsb BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_stsb +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_stsb` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_en_5.1.1_3.0_1694509008527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_en_5.1.1_3.0_1694509008527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_stsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_stsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_stsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-stsb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..5a24f818bbf09a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_wnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_wnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_wnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_custom_tokenizer_en_5.1.1_3.0_1694510024053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_custom_tokenizer_en_5.1.1_3.0_1694510024053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_wnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_wnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_wnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.7 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-wnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_en.md new file mode 100644 index 00000000000000..e7ad7199f2b60b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_glue_wnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_wnli BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_wnli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_wnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_en_5.1.1_3.0_1694509092224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_en_5.1.1_3.0_1694509092224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_wnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_wnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_wnli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-wnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_imdb_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_imdb_from_scratch_en.md new file mode 100644 index 00000000000000..435fc8604cbd35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_imdb_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_imdb_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_imdb_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_imdb_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_imdb_from_scratch_en_5.1.1_3.0_1694556494575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_imdb_from_scratch_en_5.1.1_3.0_1694556494575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_imdb_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_imdb_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_imdb_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-imdb-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_custom_tokenizer_en.md new file mode 100644 index 00000000000000..d6072ae617d93e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_rotten_tomatoes_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_rotten_tomatoes_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_rotten_tomatoes_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_custom_tokenizer_en_5.1.1_3.0_1694549132801.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_custom_tokenizer_en_5.1.1_3.0_1694549132801.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_rotten_tomatoes_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_rotten_tomatoes_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_rotten_tomatoes_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-rotten_tomatoes-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_en.md new file mode 100644 index 00000000000000..e6d871875cbae0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_rotten_tomatoes BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_rotten_tomatoes +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_rotten_tomatoes` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_en_5.1.1_3.0_1694549024792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_en_5.1.1_3.0_1694549024792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_rotten_tomatoes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_rotten_tomatoes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_rotten_tomatoes| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-rotten_tomatoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_from_scratch_en.md new file mode 100644 index 00000000000000..f02bcd6dc7d0e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_rotten_tomatoes_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_rotten_tomatoes_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_rotten_tomatoes_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_rotten_tomatoes_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_from_scratch_en_5.1.1_3.0_1694556082715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_rotten_tomatoes_from_scratch_en_5.1.1_3.0_1694556082715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_rotten_tomatoes_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_rotten_tomatoes_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_rotten_tomatoes_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-rotten_tomatoes-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..b7baa5d9ae69d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_snli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_snli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_snli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_snli_custom_tokenizer_en_5.1.1_3.0_1694549568395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_snli_custom_tokenizer_en_5.1.1_3.0_1694549568395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_snli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_snli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_snli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.7 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-snli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_en.md new file mode 100644 index 00000000000000..7184e8b0f81254 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_snli BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_snli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_snli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_snli_en_5.1.1_3.0_1694549486115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_snli_en_5.1.1_3.0_1694549486115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_snli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_snli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_snli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-snli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_from_scratch_en.md new file mode 100644 index 00000000000000..af74e062bd8cd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_snli_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_snli_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_snli_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_snli_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_snli_from_scratch_en_5.1.1_3.0_1694556408270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_snli_from_scratch_en_5.1.1_3.0_1694556408270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_snli_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_snli_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_snli_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-snli-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_custom_tokenizer_en.md new file mode 100644 index 00000000000000..71c53adcb7862f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_squad_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_squad_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_squad_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_squad_custom_tokenizer_en_5.1.1_3.0_1694547690830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_squad_custom_tokenizer_en_5.1.1_3.0_1694547690830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_squad_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_squad_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_squad_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-squad-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_en.md new file mode 100644 index 00000000000000..88957ff7bd7aa9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_squad BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_squad +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_squad` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_squad_en_5.1.1_3.0_1694547514364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_squad_en_5.1.1_3.0_1694547514364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_squad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_squad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_squad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_tweet_eval_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_tweet_eval_from_scratch_en.md new file mode 100644 index 00000000000000..ffba4d5978e137 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_tweet_eval_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_tweet_eval_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_tweet_eval_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_tweet_eval_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_tweet_eval_from_scratch_en_5.1.1_3.0_1694555281014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_tweet_eval_from_scratch_en_5.1.1_3.0_1694555281014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_tweet_eval_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_tweet_eval_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_tweet_eval_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-tweet_eval-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_custom_tokenizer_en.md new file mode 100644 index 00000000000000..a678a8c8a89aec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_wikitext_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_wikitext_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_wikitext_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_custom_tokenizer_en_5.1.1_3.0_1694548914392.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_custom_tokenizer_en_5.1.1_3.0_1694548914392.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_wikitext_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_wikitext_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_wikitext_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-wikitext-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_en.md new file mode 100644 index 00000000000000..5addb769947b43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_wikitext BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_wikitext +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_wikitext` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_en_5.1.1_3.0_1694548827535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_en_5.1.1_3.0_1694548827535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_wikitext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_wikitext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_wikitext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-wikitext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_custom_tokenizer_en.md new file mode 100644 index 00000000000000..7b18c3d6f73109 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_wikitext_from_scratch_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_wikitext_from_scratch_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_wikitext_from_scratch_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694554676967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694554676967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_wikitext_from_scratch_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_wikitext_from_scratch_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_wikitext_from_scratch_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-wikitext-from-scratch-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_en.md new file mode 100644 index 00000000000000..3effc5c3c26392 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-small_mlm_wikitext_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_wikitext_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_wikitext_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_wikitext_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_from_scratch_en_5.1.1_3.0_1694556248548.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_wikitext_from_scratch_en_5.1.1_3.0_1694556248548.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_wikitext_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_wikitext_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_wikitext_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-wikitext-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-sp_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-sp_bert_en.md new file mode 100644 index 00000000000000..63763e7dbc4f14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-sp_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sp_bert BertEmbeddings from tumd +author: John Snow Labs +name: sp_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sp_bert` is a English model originally trained by tumd. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sp_bert_en_5.1.1_3.0_1694549714006.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sp_bert_en_5.1.1_3.0_1694549714006.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sp_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sp_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sp_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.7 MB| + +## References + +https://huggingface.co/tumd/sp-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e2_en.md b/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e2_en.md new file mode 100644 index 00000000000000..ba92b1d931e349 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ssci_bert_e2 BertEmbeddings from KM4STfulltext +author: John Snow Labs +name: ssci_bert_e2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssci_bert_e2` is a English model originally trained by KM4STfulltext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssci_bert_e2_en_5.1.1_3.0_1694508246464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssci_bert_e2_en_5.1.1_3.0_1694508246464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ssci_bert_e2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ssci_bert_e2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssci_bert_e2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/KM4STfulltext/SSCI-BERT-e2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e4_en.md b/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e4_en.md new file mode 100644 index 00000000000000..b64ed6bbf787a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ssci_bert_e4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ssci_bert_e4 BertEmbeddings from KM4STfulltext +author: John Snow Labs +name: ssci_bert_e4 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssci_bert_e4` is a English model originally trained by KM4STfulltext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssci_bert_e4_en_5.1.1_3.0_1694508373404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssci_bert_e4_en_5.1.1_3.0_1694508373404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ssci_bert_e4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ssci_bert_e4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssci_bert_e4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/KM4STfulltext/SSCI-BERT-e4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e2_en.md b/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e2_en.md new file mode 100644 index 00000000000000..1e99d99f79b07a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ssci_scibert_e2 BertEmbeddings from KM4STfulltext +author: John Snow Labs +name: ssci_scibert_e2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssci_scibert_e2` is a English model originally trained by KM4STfulltext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssci_scibert_e2_en_5.1.1_3.0_1694508512306.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssci_scibert_e2_en_5.1.1_3.0_1694508512306.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ssci_scibert_e2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ssci_scibert_e2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssci_scibert_e2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/KM4STfulltext/SSCI-SciBERT-e2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e4_en.md b/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e4_en.md new file mode 100644 index 00000000000000..092e2a997f643a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ssci_scibert_e4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ssci_scibert_e4 BertEmbeddings from KM4STfulltext +author: John Snow Labs +name: ssci_scibert_e4 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ssci_scibert_e4` is a English model originally trained by KM4STfulltext. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ssci_scibert_e4_en_5.1.1_3.0_1694508639778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ssci_scibert_e4_en_5.1.1_3.0_1694508639778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ssci_scibert_e4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ssci_scibert_e4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ssci_scibert_e4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/KM4STfulltext/SSCI-SciBERT-e4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-telugu_bertu_te.md b/docs/_posts/ahmedlone127/2023-09-12-telugu_bertu_te.md new file mode 100644 index 00000000000000..feb4cc314f2447 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-telugu_bertu_te.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Telugu telugu_bertu BertEmbeddings from kuppuluri +author: John Snow Labs +name: telugu_bertu +date: 2023-09-12 +tags: [bert, te, open_source, fill_mask, onnx] +task: Embeddings +language: te +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`telugu_bertu` is a Telugu model originally trained by kuppuluri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/telugu_bertu_te_5.1.1_3.0_1694510680655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/telugu_bertu_te_5.1.1_3.0_1694510680655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("telugu_bertu","te") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("telugu_bertu", "te") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|telugu_bertu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|te| +|Size:|412.5 MB| + +## References + +https://huggingface.co/kuppuluri/telugu_bertu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-test_model_nws_en.md b/docs/_posts/ahmedlone127/2023-09-12-test_model_nws_en.md new file mode 100644 index 00000000000000..9c8ca16c8f2d54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-test_model_nws_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_model_nws BertEmbeddings from nws +author: John Snow Labs +name: test_model_nws +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_model_nws` is a English model originally trained by nws. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_model_nws_en_5.1.1_3.0_1694562663160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_model_nws_en_5.1.1_3.0_1694562663160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_model_nws","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_model_nws", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_model_nws| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/nws/test_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_bert_turkish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_bert_turkish_cased_en.md new file mode 100644 index 00000000000000..98cdccd4f0dfac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_bert_turkish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_bert_turkish_cased BertEmbeddings from uygarkurt +author: John Snow Labs +name: tiny_bert_turkish_cased +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_bert_turkish_cased` is a English model originally trained by uygarkurt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_bert_turkish_cased_en_5.1.1_3.0_1694509025179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_bert_turkish_cased_en_5.1.1_3.0_1694509025179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_bert_turkish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_bert_turkish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_bert_turkish_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|17.4 MB| + +## References + +https://huggingface.co/uygarkurt/tiny-bert-turkish-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_custom_tokenizer_en.md new file mode 100644 index 00000000000000..3ad78833fd3a6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_conll2003_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_conll2003_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_conll2003_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_custom_tokenizer_en_5.1.1_3.0_1694548724810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_custom_tokenizer_en_5.1.1_3.0_1694548724810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_conll2003_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_conll2003_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_conll2003_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-conll2003-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_en.md new file mode 100644 index 00000000000000..c6797bc36f72fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_conll2003 BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_conll2003 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_conll2003` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_en_5.1.1_3.0_1694548620549.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_en_5.1.1_3.0_1694548620549.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_conll2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_conll2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_conll2003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-conll2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_from_scratch_en.md new file mode 100644 index 00000000000000..7467ed42fd81a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_conll2003_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_conll2003_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_conll2003_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_conll2003_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_from_scratch_en_5.1.1_3.0_1694555420238.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_conll2003_from_scratch_en_5.1.1_3.0_1694555420238.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_conll2003_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_conll2003_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_conll2003_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-conll2003-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_en.md new file mode 100644 index 00000000000000..3c2eeb688abfad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_cola_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_cola_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_cola_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_custom_tokenizer_en_5.1.1_3.0_1694510104742.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_custom_tokenizer_en_5.1.1_3.0_1694510104742.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_cola_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_cola_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_cola_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-cola-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..acaa7b9bf20156 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_cola_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_cola_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_cola_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561081439.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561081439.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_cola_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_cola_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_cola_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|17.9 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-cola-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..f74c75c1c82133 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561266748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561266748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_cola_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|17.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-cola-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..5b3b531154c3ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_custom_tokenizer_en_5.1.1_3.0_1694510167051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_custom_tokenizer_en_5.1.1_3.0_1694510167051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..7de7e773837c40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561453097.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561453097.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|23.0 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..963f638a4de5a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561530936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561530936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|22.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_en.md new file mode 100644 index 00000000000000..5fbfae07caa856 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mrpc_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mrpc_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mrpc_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_custom_tokenizer_en_5.1.1_3.0_1694510364675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_custom_tokenizer_en_5.1.1_3.0_1694510364675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mrpc_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mrpc_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mrpc_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mrpc-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..7d838dab658e0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561680337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561680337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mrpc_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.9 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mrpc-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..a997aa904cf11f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561983732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694561983732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_mrpc_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-mrpc-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..270d4f0f4faaad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_custom_tokenizer_en_5.1.1_3.0_1694510431505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_custom_tokenizer_en_5.1.1_3.0_1694510431505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..156a579db32ff1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562076733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562076733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|23.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md new file mode 100644 index 00000000000000..0e3fca53ddbf77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qnli_from_scratch_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qnli_from_scratch_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qnli_from_scratch_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694551714525.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694551714525.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qnli_from_scratch_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qnli_from_scratch_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qnli_from_scratch_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli-from-scratch-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..478509e8a9a56e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562151869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562151869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|23.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_en.md new file mode 100644 index 00000000000000..6909097034f0a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qqp_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qqp_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qqp_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_custom_tokenizer_en_5.1.1_3.0_1694510502065.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_custom_tokenizer_en_5.1.1_3.0_1694510502065.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qqp_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qqp_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qqp_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qqp-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..3cf412c1eaba1d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562240997.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562240997.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qqp_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|20.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qqp-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..2e3bb3e26abfc2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562311451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562311451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|20.2 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-qqp-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_en.md new file mode 100644 index 00000000000000..7246fe54c0f393 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_rte_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_rte_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_rte_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_custom_tokenizer_en_5.1.1_3.0_1694510577450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_custom_tokenizer_en_5.1.1_3.0_1694510577450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_rte_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_rte_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_rte_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-rte-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..09ab6e1ca7c962 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_rte_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_rte_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_rte_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562388375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562388375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_rte_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_rte_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_rte_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|21.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-rte-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_en.md new file mode 100644 index 00000000000000..05284e64932d0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_rte BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_rte +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_rte` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_en_5.1.1_3.0_1694507941819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_en_5.1.1_3.0_1694507941819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_rte","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_rte", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_rte| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-rte \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..e19640033c6f5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562556409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562556409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|21.2 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-rte-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_en.md new file mode 100644 index 00000000000000..ba54ab1e43eaac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_sst2_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_sst2_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_sst2_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_custom_tokenizer_en_5.1.1_3.0_1694510651635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_custom_tokenizer_en_5.1.1_3.0_1694510651635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_sst2_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_sst2_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_sst2_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-sst2-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..f9dad70b89ea83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562631751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562631751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_sst2_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|20.1 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-sst2-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_en.md new file mode 100644 index 00000000000000..6933cb5f41fb70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_sst2 BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_sst2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_sst2` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_en_5.1.1_3.0_1694508015211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_en_5.1.1_3.0_1694508015211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_sst2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_sst2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_sst2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-sst2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..61d3b9de6cc152 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562828413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562828413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.9 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-sst2-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_en.md new file mode 100644 index 00000000000000..262b9d228c901d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_stsb_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_stsb_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_stsb_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_custom_tokenizer_en_5.1.1_3.0_1694510712289.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_custom_tokenizer_en_5.1.1_3.0_1694510712289.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_stsb_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_stsb_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_stsb_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-stsb-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..33f2db9d861ce5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562907425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694562907425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_stsb_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.3 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-stsb-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_en.md new file mode 100644 index 00000000000000..f328d8d1a3cfcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_stsb BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_stsb +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_stsb` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_en_5.1.1_3.0_1694508086728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_en_5.1.1_3.0_1694508086728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_stsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_stsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_stsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-stsb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..0bfa6281e1b19d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563003418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563003418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.1 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-stsb-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..88db108e7cdc39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_wnli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_wnli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_wnli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_custom_tokenizer_en_5.1.1_3.0_1694510789128.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_custom_tokenizer_en_5.1.1_3.0_1694510789128.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_wnli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_wnli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_wnli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-wnli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..da84a639e3f14d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563181376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563181376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_wnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.9 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-wnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_en.md new file mode 100644 index 00000000000000..94cd0e4bf27200 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_glue_wnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_wnli BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_wnli +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_wnli` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_en_5.1.1_3.0_1694508154889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_en_5.1.1_3.0_1694508154889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_wnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_wnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_wnli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-wnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_imdb_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_imdb_from_scratch_en.md new file mode 100644 index 00000000000000..369e2feb274541 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_imdb_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_imdb_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_imdb_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_imdb_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_imdb_from_scratch_en_5.1.1_3.0_1694556163748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_imdb_from_scratch_en_5.1.1_3.0_1694556163748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_imdb_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_imdb_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_imdb_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-imdb-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_custom_tokenizer_en.md new file mode 100644 index 00000000000000..d39c2419c31401 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_rotten_tomatoes_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_rotten_tomatoes_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_rotten_tomatoes_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_custom_tokenizer_en_5.1.1_3.0_1694548541463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_custom_tokenizer_en_5.1.1_3.0_1694548541463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_rotten_tomatoes_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_rotten_tomatoes_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_rotten_tomatoes_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-rotten_tomatoes-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_en.md new file mode 100644 index 00000000000000..12fa5aab55c4b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_rotten_tomatoes BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_rotten_tomatoes +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_rotten_tomatoes` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_en_5.1.1_3.0_1694548478733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_en_5.1.1_3.0_1694548478733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_rotten_tomatoes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_rotten_tomatoes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_rotten_tomatoes| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-rotten_tomatoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_from_scratch_en.md new file mode 100644 index 00000000000000..6daac517dfb2b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_rotten_tomatoes_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_rotten_tomatoes_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_rotten_tomatoes_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_rotten_tomatoes_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_from_scratch_en_5.1.1_3.0_1694555692732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_rotten_tomatoes_from_scratch_en_5.1.1_3.0_1694555692732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_rotten_tomatoes_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_rotten_tomatoes_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_rotten_tomatoes_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-rotten_tomatoes-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_custom_tokenizer_en.md new file mode 100644 index 00000000000000..4ca9d57aca3214 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_snli_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_snli_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_snli_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_snli_custom_tokenizer_en_5.1.1_3.0_1694549373032.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_snli_custom_tokenizer_en_5.1.1_3.0_1694549373032.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_snli_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_snli_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_snli_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-snli-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_from_scratch_en.md new file mode 100644 index 00000000000000..c9ceda951d0c43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_snli_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_snli_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_snli_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_snli_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_snli_from_scratch_en_5.1.1_3.0_1694555995371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_snli_from_scratch_en_5.1.1_3.0_1694555995371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_snli_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_snli_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_snli_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.4 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-snli-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_custom_tokenizer_en.md new file mode 100644 index 00000000000000..e0dcd2d50a5ab6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_squad_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_squad_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_squad_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_squad_custom_tokenizer_en_5.1.1_3.0_1694547599498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_squad_custom_tokenizer_en_5.1.1_3.0_1694547599498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_squad_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_squad_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_squad_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.5 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-squad-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_en.md new file mode 100644 index 00000000000000..44e83102346a03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_squad BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_squad +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_squad` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_squad_en_5.1.1_3.0_1694547423091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_squad_en_5.1.1_3.0_1694547423091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_squad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_squad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_squad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_tweet_eval_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_tweet_eval_from_scratch_en.md new file mode 100644 index 00000000000000..3105b80e311ead --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_tweet_eval_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_tweet_eval_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_tweet_eval_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_tweet_eval_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_tweet_eval_from_scratch_en_5.1.1_3.0_1694555163619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_tweet_eval_from_scratch_en_5.1.1_3.0_1694555163619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_tweet_eval_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_tweet_eval_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_tweet_eval_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-tweet_eval-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_custom_tokenizer_en.md new file mode 100644 index 00000000000000..23a14f4c6ed1a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_wikitext_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_wikitext_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_wikitext_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_custom_tokenizer_en_5.1.1_3.0_1694548410883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_custom_tokenizer_en_5.1.1_3.0_1694548410883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_wikitext_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_wikitext_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_wikitext_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-wikitext-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_en.md new file mode 100644 index 00000000000000..718a20122eed20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_wikitext BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_wikitext +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_wikitext` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_en_5.1.1_3.0_1694548328415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_en_5.1.1_3.0_1694548328415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_wikitext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_wikitext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_wikitext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-wikitext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_custom_tokenizer_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_custom_tokenizer_en.md new file mode 100644 index 00000000000000..1afc42653b28cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_custom_tokenizer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_wikitext_from_scratch_custom_tokenizer BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_wikitext_from_scratch_custom_tokenizer +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_wikitext_from_scratch_custom_tokenizer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694554282672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_from_scratch_custom_tokenizer_en_5.1.1_3.0_1694554282672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_wikitext_from_scratch_custom_tokenizer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_wikitext_from_scratch_custom_tokenizer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_wikitext_from_scratch_custom_tokenizer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-wikitext-from-scratch-custom-tokenizer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_en.md new file mode 100644 index 00000000000000..7e8d46e34f0bee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-tiny_mlm_wikitext_from_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_wikitext_from_scratch BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_wikitext_from_scratch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_wikitext_from_scratch` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_from_scratch_en_5.1.1_3.0_1694555916739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_wikitext_from_scratch_en_5.1.1_3.0_1694555916739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_wikitext_from_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_wikitext_from_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_wikitext_from_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-wikitext-from-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-topic_it_science_bert_en.md b/docs/_posts/ahmedlone127/2023-09-12-topic_it_science_bert_en.md new file mode 100644 index 00000000000000..7111c6abb236fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-topic_it_science_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English topic_it_science_bert BertEmbeddings from Kdogs +author: John Snow Labs +name: topic_it_science_bert +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`topic_it_science_bert` is a English model originally trained by Kdogs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/topic_it_science_bert_en_5.1.1_3.0_1694562722460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/topic_it_science_bert_en_5.1.1_3.0_1694562722460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("topic_it_science_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("topic_it_science_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|topic_it_science_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Kdogs/topic_IT-Science_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-transformer_nlp_en.md b/docs/_posts/ahmedlone127/2023-09-12-transformer_nlp_en.md new file mode 100644 index 00000000000000..f27af25c1bb53d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-transformer_nlp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English transformer_nlp BertEmbeddings from onon214 +author: John Snow Labs +name: transformer_nlp +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`transformer_nlp` is a English model originally trained by onon214. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/transformer_nlp_en_5.1.1_3.0_1694562498875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/transformer_nlp_en_5.1.1_3.0_1694562498875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("transformer_nlp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("transformer_nlp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|transformer_nlp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/onon214/transformer-NLP \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-twitch_bert_base_cased_pytorch_en.md b/docs/_posts/ahmedlone127/2023-09-12-twitch_bert_base_cased_pytorch_en.md new file mode 100644 index 00000000000000..da34a0b57d8d48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-twitch_bert_base_cased_pytorch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English twitch_bert_base_cased_pytorch BertEmbeddings from veb +author: John Snow Labs +name: twitch_bert_base_cased_pytorch +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twitch_bert_base_cased_pytorch` is a English model originally trained by veb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twitch_bert_base_cased_pytorch_en_5.1.1_3.0_1694554479279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twitch_bert_base_cased_pytorch_en_5.1.1_3.0_1694554479279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("twitch_bert_base_cased_pytorch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("twitch_bert_base_cased_pytorch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twitch_bert_base_cased_pytorch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/veb/twitch-bert-base-cased-pytorch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e1_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e1_en.md new file mode 100644 index 00000000000000..dad45e9deeb701 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugmultbert_e1 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugmultbert_e1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugmultbert_e1` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugmultbert_e1_en_5.1.1_3.0_1694550516279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugmultbert_e1_en_5.1.1_3.0_1694550516279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugmultbert_e1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugmultbert_e1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugmultbert_e1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.4 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgMultBERT_e1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e2_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e2_en.md new file mode 100644 index 00000000000000..e4235ef45efb29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugmultbert_e2 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugmultbert_e2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugmultbert_e2` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugmultbert_e2_en_5.1.1_3.0_1694551678403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugmultbert_e2_en_5.1.1_3.0_1694551678403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugmultbert_e2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugmultbert_e2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugmultbert_e2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.2 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgMultBERT_e2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e3_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e3_en.md new file mode 100644 index 00000000000000..63facb54a8d3a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugmultbert_e3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugmultbert_e3 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugmultbert_e3 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugmultbert_e3` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugmultbert_e3_en_5.1.1_3.0_1694551842232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugmultbert_e3_en_5.1.1_3.0_1694551842232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugmultbert_e3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugmultbert_e3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugmultbert_e3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.0 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgMultBERT_e3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugmultmtokbert_3e_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugmultmtokbert_3e_en.md new file mode 100644 index 00000000000000..0c2e7e90013903 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugmultmtokbert_3e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugmultmtokbert_3e BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugmultmtokbert_3e +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugmultmtokbert_3e` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugmultmtokbert_3e_en_5.1.1_3.0_1694551069053.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugmultmtokbert_3e_en_5.1.1_3.0_1694551069053.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugmultmtokbert_3e","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugmultmtokbert_3e", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugmultmtokbert_3e| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|742.6 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgMultMTokBERT_3e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e1_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e1_en.md new file mode 100644 index 00000000000000..841bbafc098785 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugturkbert_e1 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugturkbert_e1 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugturkbert_e1` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugturkbert_e1_en_5.1.1_3.0_1694551986988.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugturkbert_e1_en_5.1.1_3.0_1694551986988.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugturkbert_e1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugturkbert_e1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugturkbert_e1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.3 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgTurkBERT_e1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e2_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e2_en.md new file mode 100644 index 00000000000000..1cb2829bc16a32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugturkbert_e2 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugturkbert_e2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugturkbert_e2` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugturkbert_e2_en_5.1.1_3.0_1694552141789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugturkbert_e2_en_5.1.1_3.0_1694552141789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugturkbert_e2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugturkbert_e2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugturkbert_e2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.2 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgTurkBERT_e2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e3_en.md b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e3_en.md new file mode 100644 index 00000000000000..5d30aadeabb4c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-ugturkbert_e3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ugturkbert_e3 BertEmbeddings from TurkLangsTeamURFU +author: John Snow Labs +name: ugturkbert_e3 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ugturkbert_e3` is a English model originally trained by TurkLangsTeamURFU. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ugturkbert_e3_en_5.1.1_3.0_1694552327758.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ugturkbert_e3_en_5.1.1_3.0_1694552327758.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ugturkbert_e3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ugturkbert_e3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ugturkbert_e3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.1 MB| + +## References + +https://huggingface.co/TurkLangsTeamURFU/UgTurkBERT_e3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-12-xbert2_en.md b/docs/_posts/ahmedlone127/2023-09-12-xbert2_en.md new file mode 100644 index 00000000000000..4f76afc566c76a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-12-xbert2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English xbert2 BertEmbeddings from tmc +author: John Snow Labs +name: xbert2 +date: 2023-09-12 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xbert2` is a English model originally trained by tmc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xbert2_en_5.1.1_3.0_1694551885455.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xbert2_en_5.1.1_3.0_1694551885455.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("xbert2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("xbert2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xbert2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/tmc/xbert2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-2022_02_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-2022_02_10_en.md new file mode 100644 index 00000000000000..202256d38246f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-2022_02_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 2022_02_10 BertEmbeddings from wuyanzu +author: John Snow Labs +name: 2022_02_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`2022_02_10` is a English model originally trained by wuyanzu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/2022_02_10_en_5.1.1_3.0_1694586410513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/2022_02_10_en_5.1.1_3.0_1694586410513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("2022_02_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("2022_02_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|2022_02_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wuyanzu/2022_02_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-a_different_bert_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-a_different_bert_model_en.md new file mode 100644 index 00000000000000..491e2bbd31cf0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-a_different_bert_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English a_different_bert_model BertEmbeddings from bstad +author: John Snow Labs +name: a_different_bert_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`a_different_bert_model` is a English model originally trained by bstad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/a_different_bert_model_en_5.1.1_3.0_1694589202062.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/a_different_bert_model_en_5.1.1_3.0_1694589202062.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("a_different_bert_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("a_different_bert_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|a_different_bert_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/bstad/a-different-bert-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-aave_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-aave_bert_en.md new file mode 100644 index 00000000000000..ab0b5409aaef14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-aave_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English aave_bert BertEmbeddings from csalaam +author: John Snow Labs +name: aave_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aave_bert` is a English model originally trained by csalaam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aave_bert_en_5.1.1_3.0_1694611678460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aave_bert_en_5.1.1_3.0_1694611678460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("aave_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("aave_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aave_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/csalaam/AAVE-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_1_en.md new file mode 100644 index 00000000000000..d0577d6f17857a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English absa_mlm_1 BertEmbeddings from UchihaMadara +author: John Snow Labs +name: absa_mlm_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`absa_mlm_1` is a English model originally trained by UchihaMadara. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/absa_mlm_1_en_5.1.1_3.0_1694579162814.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/absa_mlm_1_en_5.1.1_3.0_1694579162814.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("absa_mlm_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("absa_mlm_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|absa_mlm_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/UchihaMadara/absa-mlm-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_2_en.md new file mode 100644 index 00000000000000..bf605a09be386c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-absa_mlm_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English absa_mlm_2 BertEmbeddings from UchihaMadara +author: John Snow Labs +name: absa_mlm_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`absa_mlm_2` is a English model originally trained by UchihaMadara. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/absa_mlm_2_en_5.1.1_3.0_1694579368670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/absa_mlm_2_en_5.1.1_3.0_1694579368670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("absa_mlm_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("absa_mlm_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|absa_mlm_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/UchihaMadara/absa-mlm-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-adopted_bert_base_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-adopted_bert_base_cased_en.md new file mode 100644 index 00000000000000..5a74a536c75295 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-adopted_bert_base_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English adopted_bert_base_cased BertEmbeddings from sivanravid +author: John Snow Labs +name: adopted_bert_base_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`adopted_bert_base_cased` is a English model originally trained by sivanravid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/adopted_bert_base_cased_en_5.1.1_3.0_1694617850169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/adopted_bert_base_cased_en_5.1.1_3.0_1694617850169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("adopted_bert_base_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("adopted_bert_base_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|adopted_bert_base_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/sivanravid/adopted-bert-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-adrbert_base_p1_en.md b/docs/_posts/ahmedlone127/2023-09-13-adrbert_base_p1_en.md new file mode 100644 index 00000000000000..0caa21b7ce0587 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-adrbert_base_p1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English adrbert_base_p1 BertEmbeddings from adriansyahdr +author: John Snow Labs +name: adrbert_base_p1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`adrbert_base_p1` is a English model originally trained by adriansyahdr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/adrbert_base_p1_en_5.1.1_3.0_1694576788237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/adrbert_base_p1_en_5.1.1_3.0_1694576788237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("adrbert_base_p1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("adrbert_base_p1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|adrbert_base_p1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|470.1 MB| + +## References + +https://huggingface.co/adriansyahdr/adrBert-base-p1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-agriculture_bert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-agriculture_bert_uncased_en.md new file mode 100644 index 00000000000000..ab77f2d4f15401 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-agriculture_bert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English agriculture_bert_uncased BertEmbeddings from recobo +author: John Snow Labs +name: agriculture_bert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`agriculture_bert_uncased` is a English model originally trained by recobo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/agriculture_bert_uncased_en_5.1.1_3.0_1694566700185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/agriculture_bert_uncased_en_5.1.1_3.0_1694566700185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("agriculture_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("agriculture_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|agriculture_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/recobo/agriculture-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ai12_en.md b/docs/_posts/ahmedlone127/2023-09-13-ai12_en.md new file mode 100644 index 00000000000000..ca5f33564417ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ai12_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ai12 BertEmbeddings from zzecf +author: John Snow Labs +name: ai12 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ai12` is a English model originally trained by zzecf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ai12_en_5.1.1_3.0_1694599540494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ai12_en_5.1.1_3.0_1694599540494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ai12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ai12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ai12| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/zzecf/AI12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ai12nlp_en.md b/docs/_posts/ahmedlone127/2023-09-13-ai12nlp_en.md new file mode 100644 index 00000000000000..6f5f5eac6a1457 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ai12nlp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ai12nlp BertEmbeddings from zhizihuabai +author: John Snow Labs +name: ai12nlp +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ai12nlp` is a English model originally trained by zhizihuabai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ai12nlp_en_5.1.1_3.0_1694593503233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ai12nlp_en_5.1.1_3.0_1694593503233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ai12nlp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ai12nlp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ai12nlp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/zhizihuabai/ai12nlp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ai12one_en.md b/docs/_posts/ahmedlone127/2023-09-13-ai12one_en.md new file mode 100644 index 00000000000000..416607b63561aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ai12one_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ai12one BertEmbeddings from zhizihuabai +author: John Snow Labs +name: ai12one +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ai12one` is a English model originally trained by zhizihuabai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ai12one_en_5.1.1_3.0_1694593660881.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ai12one_en_5.1.1_3.0_1694593660881.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ai12one","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ai12one", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ai12one| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/zhizihuabai/ai12one \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-aivengers_bert_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-aivengers_bert_finetuned_en.md new file mode 100644 index 00000000000000..211ca83e5c0862 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-aivengers_bert_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English aivengers_bert_finetuned BertEmbeddings from dkqp +author: John Snow Labs +name: aivengers_bert_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aivengers_bert_finetuned` is a English model originally trained by dkqp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aivengers_bert_finetuned_en_5.1.1_3.0_1694620043636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aivengers_bert_finetuned_en_5.1.1_3.0_1694620043636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("aivengers_bert_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("aivengers_bert_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aivengers_bert_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/dkqp/AiVENGERS_BERT_FineTuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-aivengers_multilingual_base_xx.md b/docs/_posts/ahmedlone127/2023-09-13-aivengers_multilingual_base_xx.md new file mode 100644 index 00000000000000..5d063928b4a497 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-aivengers_multilingual_base_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual aivengers_multilingual_base BertEmbeddings from kimjae +author: John Snow Labs +name: aivengers_multilingual_base +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aivengers_multilingual_base` is a Multilingual model originally trained by kimjae. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aivengers_multilingual_base_xx_5.1.1_3.0_1694632030712.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aivengers_multilingual_base_xx_5.1.1_3.0_1694632030712.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("aivengers_multilingual_base","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("aivengers_multilingual_base", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aivengers_multilingual_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/kimjae/aivengers_multilingual_base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_flax_community_xx.md b/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_flax_community_xx.md new file mode 100644 index 00000000000000..dc42e3991efcd5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_flax_community_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual alberti_bert_base_multilingual_cased_flax_community BertEmbeddings from flax-community +author: John Snow Labs +name: alberti_bert_base_multilingual_cased_flax_community +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alberti_bert_base_multilingual_cased_flax_community` is a Multilingual model originally trained by flax-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alberti_bert_base_multilingual_cased_flax_community_xx_5.1.1_3.0_1694642221069.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alberti_bert_base_multilingual_cased_flax_community_xx_5.1.1_3.0_1694642221069.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("alberti_bert_base_multilingual_cased_flax_community","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("alberti_bert_base_multilingual_cased_flax_community", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alberti_bert_base_multilingual_cased_flax_community| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|664.4 MB| + +## References + +https://huggingface.co/flax-community/alberti-bert-base-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_linhd_postdata_xx.md b/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_linhd_postdata_xx.md new file mode 100644 index 00000000000000..b1f4da96e304e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-alberti_bert_base_multilingual_cased_linhd_postdata_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual alberti_bert_base_multilingual_cased_linhd_postdata BertEmbeddings from linhd-postdata +author: John Snow Labs +name: alberti_bert_base_multilingual_cased_linhd_postdata +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alberti_bert_base_multilingual_cased_linhd_postdata` is a Multilingual model originally trained by linhd-postdata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alberti_bert_base_multilingual_cased_linhd_postdata_xx_5.1.1_3.0_1694577289966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alberti_bert_base_multilingual_cased_linhd_postdata_xx_5.1.1_3.0_1694577289966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("alberti_bert_base_multilingual_cased_linhd_postdata","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("alberti_bert_base_multilingual_cased_linhd_postdata", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alberti_bert_base_multilingual_cased_linhd_postdata| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|664.4 MB| + +## References + +https://huggingface.co/linhd-postdata/alberti-bert-base-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_base_en.md new file mode 100644 index 00000000000000..3d4cf3b3466d62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English alephbertgimmel_base BertEmbeddings from dicta-il +author: John Snow Labs +name: alephbertgimmel_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alephbertgimmel_base` is a English model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alephbertgimmel_base_en_5.1.1_3.0_1694594556756.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alephbertgimmel_base_en_5.1.1_3.0_1694594556756.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("alephbertgimmel_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("alephbertgimmel_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alephbertgimmel_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|690.4 MB| + +## References + +https://huggingface.co/dicta-il/alephbertgimmel-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_small_128_he.md b/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_small_128_he.md new file mode 100644 index 00000000000000..9a91bffe8d49c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_small_128_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew alephbertgimmel_small_128 BertEmbeddings from imvladikon +author: John Snow Labs +name: alephbertgimmel_small_128 +date: 2023-09-13 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alephbertgimmel_small_128` is a Hebrew model originally trained by imvladikon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alephbertgimmel_small_128_he_5.1.1_3.0_1694642025855.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alephbertgimmel_small_128_he_5.1.1_3.0_1694642025855.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("alephbertgimmel_small_128","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("alephbertgimmel_small_128", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alephbertgimmel_small_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|295.5 MB| + +## References + +https://huggingface.co/imvladikon/alephbertgimmel-small-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_small_en.md b/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_small_en.md new file mode 100644 index 00000000000000..977afc12fb0680 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-alephbertgimmel_small_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English alephbertgimmel_small BertEmbeddings from dicta-il +author: John Snow Labs +name: alephbertgimmel_small +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alephbertgimmel_small` is a English model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alephbertgimmel_small_en_5.1.1_3.0_1694594680695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alephbertgimmel_small_en_5.1.1_3.0_1694594680695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("alephbertgimmel_small","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("alephbertgimmel_small", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alephbertgimmel_small| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|295.5 MB| + +## References + +https://huggingface.co/dicta-il/alephbertgimmel-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-algarlegal_base_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_base_arabertv2_en.md new file mode 100644 index 00000000000000..46c5273fbbb754 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_base_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegal_base_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegal_base_arabertv2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegal_base_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegal_base_arabertv2_en_5.1.1_3.0_1694566807785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegal_base_arabertv2_en_5.1.1_3.0_1694566807785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegal_base_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegal_base_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegal_base_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.8 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegal-base-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-algarlegal_large_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_large_arabertv2_en.md new file mode 100644 index 00000000000000..470ed97e8f00d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_large_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegal_large_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegal_large_arabertv2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegal_large_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegal_large_arabertv2_en_5.1.1_3.0_1694566592236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegal_large_arabertv2_en_5.1.1_3.0_1694566592236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegal_large_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegal_large_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegal_large_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegal-large-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_base_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_base_arabertv2_en.md new file mode 100644 index 00000000000000..0ed7d52a89de75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_base_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegal_nonumber_base_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegal_nonumber_base_arabertv2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegal_nonumber_base_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegal_nonumber_base_arabertv2_en_5.1.1_3.0_1694567513179.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegal_nonumber_base_arabertv2_en_5.1.1_3.0_1694567513179.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegal_nonumber_base_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegal_nonumber_base_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegal_nonumber_base_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.8 MB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegal-Nonumber-base-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_large_arabertv2_en.md b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_large_arabertv2_en.md new file mode 100644 index 00000000000000..1f5104e2a5d6ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-algarlegal_nonumber_large_arabertv2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English algarlegal_nonumber_large_arabertv2 BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: algarlegal_nonumber_large_arabertv2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`algarlegal_nonumber_large_arabertv2` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/algarlegal_nonumber_large_arabertv2_en_5.1.1_3.0_1694567316136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/algarlegal_nonumber_large_arabertv2_en_5.1.1_3.0_1694567316136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("algarlegal_nonumber_large_arabertv2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("algarlegal_nonumber_large_arabertv2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|algarlegal_nonumber_large_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/hatemestinbejaia/AlgArLegal-Nonumber-large-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-all_minilm_l6_v2_finetuned_wikitext2_en.md b/docs/_posts/ahmedlone127/2023-09-13-all_minilm_l6_v2_finetuned_wikitext2_en.md new file mode 100644 index 00000000000000..f95a0dce3bab16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-all_minilm_l6_v2_finetuned_wikitext2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English all_minilm_l6_v2_finetuned_wikitext2 BertEmbeddings from Shimiao +author: John Snow Labs +name: all_minilm_l6_v2_finetuned_wikitext2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`all_minilm_l6_v2_finetuned_wikitext2` is a English model originally trained by Shimiao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/all_minilm_l6_v2_finetuned_wikitext2_en_5.1.1_3.0_1694600245310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/all_minilm_l6_v2_finetuned_wikitext2_en_5.1.1_3.0_1694600245310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("all_minilm_l6_v2_finetuned_wikitext2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("all_minilm_l6_v2_finetuned_wikitext2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|all_minilm_l6_v2_finetuned_wikitext2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|84.6 MB| + +## References + +https://huggingface.co/Shimiao/all-MiniLM-L6-v2-finetuned-wikitext2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-answer_model_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-answer_model_bert_base_uncased_en.md new file mode 100644 index 00000000000000..65cf8254efadbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-answer_model_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English answer_model_bert_base_uncased BertEmbeddings from Mayank393 +author: John Snow Labs +name: answer_model_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`answer_model_bert_base_uncased` is a English model originally trained by Mayank393. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/answer_model_bert_base_uncased_en_5.1.1_3.0_1694619524376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/answer_model_bert_base_uncased_en_5.1.1_3.0_1694619524376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("answer_model_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("answer_model_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|answer_model_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Mayank393/Answer_Model_Bert_Base_uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-apachebertbasecase_en.md b/docs/_posts/ahmedlone127/2023-09-13-apachebertbasecase_en.md new file mode 100644 index 00000000000000..fa72d217cb6063 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-apachebertbasecase_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English apachebertbasecase BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: apachebertbasecase +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`apachebertbasecase` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/apachebertbasecase_en_5.1.1_3.0_1694572112065.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/apachebertbasecase_en_5.1.1_3.0_1694572112065.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("apachebertbasecase","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("apachebertbasecase", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|apachebertbasecase| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/ApacheBertBaseCase \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-applicanttrackingsystembert_en.md b/docs/_posts/ahmedlone127/2023-09-13-applicanttrackingsystembert_en.md new file mode 100644 index 00000000000000..7eb98f7748696d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-applicanttrackingsystembert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English applicanttrackingsystembert BertEmbeddings from Shushant +author: John Snow Labs +name: applicanttrackingsystembert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`applicanttrackingsystembert` is a English model originally trained by Shushant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/applicanttrackingsystembert_en_5.1.1_3.0_1694574178617.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/applicanttrackingsystembert_en_5.1.1_3.0_1694574178617.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("applicanttrackingsystembert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("applicanttrackingsystembert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|applicanttrackingsystembert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/Shushant/ApplicantTrackingSystemBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ara_dialectbert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-ara_dialectbert_ar.md new file mode 100644 index 00000000000000..4826748be7afc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ara_dialectbert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic ara_dialectbert BertEmbeddings from MutazYoune +author: John Snow Labs +name: ara_dialectbert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ara_dialectbert` is a Arabic model originally trained by MutazYoune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ara_dialectbert_ar_5.1.1_3.0_1694567913022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ara_dialectbert_ar_5.1.1_3.0_1694567913022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ara_dialectbert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ara_dialectbert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ara_dialectbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|406.3 MB| + +## References + +https://huggingface.co/MutazYoune/Ara_DialectBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-arab_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-arab_bert_en.md new file mode 100644 index 00000000000000..551c27abf4ba4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-arab_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arab_bert BertEmbeddings from MutazYoune +author: John Snow Labs +name: arab_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arab_bert` is a English model originally trained by MutazYoune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arab_bert_en_5.1.1_3.0_1694639723986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arab_bert_en_5.1.1_3.0_1694639723986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arab_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arab_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arab_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.4 MB| + +## References + +https://huggingface.co/MutazYoune/ARAB_BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-arabert_quran_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-arabert_quran_large_en.md new file mode 100644 index 00000000000000..ef998da95b03a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-arabert_quran_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabert_quran_large BertEmbeddings from omarelsayeed +author: John Snow Labs +name: arabert_quran_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabert_quran_large` is a English model originally trained by omarelsayeed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabert_quran_large_en_5.1.1_3.0_1694590848206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabert_quran_large_en_5.1.1_3.0_1694590848206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabert_quran_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabert_quran_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabert_quran_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/omarelsayeed/arabert_quran_large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-arabertautomodelformaskedlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-arabertautomodelformaskedlm_en.md new file mode 100644 index 00000000000000..38570a5356f300 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-arabertautomodelformaskedlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabertautomodelformaskedlm BertEmbeddings from oknashar +author: John Snow Labs +name: arabertautomodelformaskedlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertautomodelformaskedlm` is a English model originally trained by oknashar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertautomodelformaskedlm_en_5.1.1_3.0_1694615119192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertautomodelformaskedlm_en_5.1.1_3.0_1694615119192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertautomodelformaskedlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertautomodelformaskedlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertautomodelformaskedlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.6 MB| + +## References + +https://huggingface.co/oknashar/arabertAutoModelForMaskedLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-arabertmo_base_v10_en.md b/docs/_posts/ahmedlone127/2023-09-13-arabertmo_base_v10_en.md new file mode 100644 index 00000000000000..47d65bda6cc454 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-arabertmo_base_v10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabertmo_base_v10 BertEmbeddings from Ebtihal +author: John Snow Labs +name: arabertmo_base_v10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabertmo_base_v10` is a English model originally trained by Ebtihal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabertmo_base_v10_en_5.1.1_3.0_1694604536787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabertmo_base_v10_en_5.1.1_3.0_1694604536787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabertmo_base_v10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabertmo_base_v10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabertmo_base_v10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/Ebtihal/AraBertMo_base_V10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-arabic_hd_en.md b/docs/_posts/ahmedlone127/2023-09-13-arabic_hd_en.md new file mode 100644 index 00000000000000..c212d933f5b95f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-arabic_hd_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English arabic_hd BertEmbeddings from avichr +author: John Snow Labs +name: arabic_hd +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arabic_hd` is a English model originally trained by avichr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arabic_hd_en_5.1.1_3.0_1694584520930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arabic_hd_en_5.1.1_3.0_1694584520930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arabic_hd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arabic_hd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arabic_hd| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/avichr/ar_hd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-arbert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-arbert_ar.md new file mode 100644 index 00000000000000..e3174cd469c425 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-arbert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic arbert BertEmbeddings from UBC-NLP +author: John Snow Labs +name: arbert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`arbert` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/arbert_ar_5.1.1_3.0_1694573787008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/arbert_ar_5.1.1_3.0_1694573787008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("arbert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("arbert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|arbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|605.3 MB| + +## References + +https://huggingface.co/UBC-NLP/ARBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-archeobertje_en.md b/docs/_posts/ahmedlone127/2023-09-13-archeobertje_en.md new file mode 100644 index 00000000000000..95c2c34d71ee75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-archeobertje_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English archeobertje BertEmbeddings from alexbrandsen +author: John Snow Labs +name: archeobertje +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`archeobertje` is a English model originally trained by alexbrandsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/archeobertje_en_5.1.1_3.0_1694578129084.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/archeobertje_en_5.1.1_3.0_1694578129084.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("archeobertje","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("archeobertje", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|archeobertje| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.5 MB| + +## References + +https://huggingface.co/alexbrandsen/ArcheoBERTje \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-assamese_bert_as.md b/docs/_posts/ahmedlone127/2023-09-13-assamese_bert_as.md new file mode 100644 index 00000000000000..e63db8e5e431b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-assamese_bert_as.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Assamese assamese_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: assamese_bert +date: 2023-09-13 +tags: [bert, as, open_source, fill_mask, onnx] +task: Embeddings +language: as +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`assamese_bert` is a Assamese model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/assamese_bert_as_5.1.1_3.0_1694642657748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/assamese_bert_as_5.1.1_3.0_1694642657748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("assamese_bert","as") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("assamese_bert", "as") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|assamese_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|as| +|Size:|890.4 MB| + +## References + +https://huggingface.co/l3cube-pune/assamese-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-author_identification_en.md b/docs/_posts/ahmedlone127/2023-09-13-author_identification_en.md new file mode 100644 index 00000000000000..36ba5b6015a9f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-author_identification_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English author_identification BertEmbeddings from Omar2027 +author: John Snow Labs +name: author_identification +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`author_identification` is a English model originally trained by Omar2027. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/author_identification_en_5.1.1_3.0_1694569409752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/author_identification_en_5.1.1_3.0_1694569409752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("author_identification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("author_identification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|author_identification| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.1 MB| + +## References + +https://huggingface.co/Omar2027/Author_identification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-azbert_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-azbert_base_en.md new file mode 100644 index 00000000000000..dec6b8e17faa32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-azbert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English azbert_base BertEmbeddings from castorini +author: John Snow Labs +name: azbert_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`azbert_base` is a English model originally trained by castorini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/azbert_base_en_5.1.1_3.0_1694590742738.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/azbert_base_en_5.1.1_3.0_1694590742738.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("azbert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("azbert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|azbert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.7 MB| + +## References + +https://huggingface.co/castorini/azbert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-b_fb_sms_lm_en.md b/docs/_posts/ahmedlone127/2023-09-13-b_fb_sms_lm_en.md new file mode 100644 index 00000000000000..bb4d7f3bdcbf69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-b_fb_sms_lm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English b_fb_sms_lm BertEmbeddings from adnankhawaja +author: John Snow Labs +name: b_fb_sms_lm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`b_fb_sms_lm` is a English model originally trained by adnankhawaja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/b_fb_sms_lm_en_5.1.1_3.0_1694638757732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/b_fb_sms_lm_en_5.1.1_3.0_1694638757732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("b_fb_sms_lm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("b_fb_sms_lm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|b_fb_sms_lm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/adnankhawaja/B_FB_SMS_LM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-b_t_fb_lm_en.md b/docs/_posts/ahmedlone127/2023-09-13-b_t_fb_lm_en.md new file mode 100644 index 00000000000000..2f4710361289f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-b_t_fb_lm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English b_t_fb_lm BertEmbeddings from adnankhawaja +author: John Snow Labs +name: b_t_fb_lm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`b_t_fb_lm` is a English model originally trained by adnankhawaja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/b_t_fb_lm_en_5.1.1_3.0_1694637241499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/b_t_fb_lm_en_5.1.1_3.0_1694637241499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("b_t_fb_lm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("b_t_fb_lm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|b_t_fb_lm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/adnankhawaja/B_T_FB_LM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-b_t_sms_lm_en.md b/docs/_posts/ahmedlone127/2023-09-13-b_t_sms_lm_en.md new file mode 100644 index 00000000000000..eb825775e44d97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-b_t_sms_lm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English b_t_sms_lm BertEmbeddings from adnankhawaja +author: John Snow Labs +name: b_t_sms_lm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`b_t_sms_lm` is a English model originally trained by adnankhawaja. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/b_t_sms_lm_en_5.1.1_3.0_1694638241378.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/b_t_sms_lm_en_5.1.1_3.0_1694638241378.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("b_t_sms_lm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("b_t_sms_lm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|b_t_sms_lm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/adnankhawaja/B_T_SMS_LM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-backbone_bertnsp_600_en.md b/docs/_posts/ahmedlone127/2023-09-13-backbone_bertnsp_600_en.md new file mode 100644 index 00000000000000..246af4e1869171 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-backbone_bertnsp_600_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English backbone_bertnsp_600 BertEmbeddings from approach0 +author: John Snow Labs +name: backbone_bertnsp_600 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`backbone_bertnsp_600` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/backbone_bertnsp_600_en_5.1.1_3.0_1694618178947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/backbone_bertnsp_600_en_5.1.1_3.0_1694618178947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("backbone_bertnsp_600","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("backbone_bertnsp_600", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|backbone_bertnsp_600| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/backbone-bertnsp-600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-backbone_cocomae_600_en.md b/docs/_posts/ahmedlone127/2023-09-13-backbone_cocomae_600_en.md new file mode 100644 index 00000000000000..ab45a99b6dd697 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-backbone_cocomae_600_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English backbone_cocomae_600 BertEmbeddings from approach0 +author: John Snow Labs +name: backbone_cocomae_600 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`backbone_cocomae_600` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/backbone_cocomae_600_en_5.1.1_3.0_1694617810215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/backbone_cocomae_600_en_5.1.1_3.0_1694617810215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("backbone_cocomae_600","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("backbone_cocomae_600", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|backbone_cocomae_600| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/backbone-cocomae-600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-backbone_cocondenser_600_en.md b/docs/_posts/ahmedlone127/2023-09-13-backbone_cocondenser_600_en.md new file mode 100644 index 00000000000000..9756e971664562 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-backbone_cocondenser_600_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English backbone_cocondenser_600 BertEmbeddings from approach0 +author: John Snow Labs +name: backbone_cocondenser_600 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`backbone_cocondenser_600` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/backbone_cocondenser_600_en_5.1.1_3.0_1694618588688.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/backbone_cocondenser_600_en_5.1.1_3.0_1694618588688.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("backbone_cocondenser_600","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("backbone_cocondenser_600", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|backbone_cocondenser_600| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/backbone-cocondenser-600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-backbone_cotmae_600_en.md b/docs/_posts/ahmedlone127/2023-09-13-backbone_cotmae_600_en.md new file mode 100644 index 00000000000000..4dfcb19b4d69c4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-backbone_cotmae_600_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English backbone_cotmae_600 BertEmbeddings from approach0 +author: John Snow Labs +name: backbone_cotmae_600 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`backbone_cotmae_600` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/backbone_cotmae_600_en_5.1.1_3.0_1694619025139.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/backbone_cotmae_600_en_5.1.1_3.0_1694619025139.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("backbone_cotmae_600","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("backbone_cotmae_600", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|backbone_cotmae_600| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/backbone-cotmae-600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_bn.md b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_bn.md new file mode 100644 index 00000000000000..fa3ca966eae826 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_bn.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Bengali bangla_bert_base BertEmbeddings from sagorsarker +author: John Snow Labs +name: bangla_bert_base +date: 2023-09-13 +tags: [bert, bn, open_source, fill_mask, onnx] +task: Embeddings +language: bn +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bangla_bert_base` is a Bengali model originally trained by sagorsarker. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bangla_bert_base_bn_5.1.1_3.0_1694569397272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bangla_bert_base_bn_5.1.1_3.0_1694569397272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bangla_bert_base","bn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bangla_bert_base", "bn") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bangla_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|bn| +|Size:|614.7 MB| + +## References + +https://huggingface.co/sagorsarker/bangla-bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_finetuned_tweets_en.md b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_finetuned_tweets_en.md new file mode 100644 index 00000000000000..b987da576ab2c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_base_finetuned_tweets_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bangla_bert_base_finetuned_tweets BertEmbeddings from myahan007 +author: John Snow Labs +name: bangla_bert_base_finetuned_tweets +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bangla_bert_base_finetuned_tweets` is a English model originally trained by myahan007. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bangla_bert_base_finetuned_tweets_en_5.1.1_3.0_1694589153688.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bangla_bert_base_finetuned_tweets_en_5.1.1_3.0_1694589153688.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bangla_bert_base_finetuned_tweets","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bangla_bert_base_finetuned_tweets", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bangla_bert_base_finetuned_tweets| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|613.8 MB| + +## References + +https://huggingface.co/myahan007/bangla-bert-base-finetuned-tweets \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_bn.md b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_bn.md new file mode 100644 index 00000000000000..33c51b5cdcf1ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bangla_bert_bn.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Bengali bangla_bert BertEmbeddings from Kowsher +author: John Snow Labs +name: bangla_bert +date: 2023-09-13 +tags: [bert, bn, open_source, fill_mask, onnx] +task: Embeddings +language: bn +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bangla_bert` is a Bengali model originally trained by Kowsher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bangla_bert_bn_5.1.1_3.0_1694564809999.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bangla_bert_bn_5.1.1_3.0_1694564809999.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bangla_bert","bn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bangla_bert", "bn") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bangla_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|bn| +|Size:|612.1 MB| + +## References + +https://huggingface.co/Kowsher/bangla-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-batterybert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-batterybert_cased_en.md new file mode 100644 index 00000000000000..5ffeec3a3bcb64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-batterybert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English batterybert_cased BertEmbeddings from batterydata +author: John Snow Labs +name: batterybert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`batterybert_cased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/batterybert_cased_en_5.1.1_3.0_1694585277909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/batterybert_cased_en_5.1.1_3.0_1694585277909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("batterybert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("batterybert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|batterybert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/batterydata/batterybert-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-batterybert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-batterybert_uncased_en.md new file mode 100644 index 00000000000000..17256679f12a65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-batterybert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English batterybert_uncased BertEmbeddings from batterydata +author: John Snow Labs +name: batterybert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`batterybert_uncased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/batterybert_uncased_en_5.1.1_3.0_1694585447411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/batterybert_uncased_en_5.1.1_3.0_1694585447411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("batterybert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("batterybert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|batterybert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/batterydata/batterybert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-batteryonlybert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-batteryonlybert_cased_en.md new file mode 100644 index 00000000000000..10374191eff46a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-batteryonlybert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English batteryonlybert_cased BertEmbeddings from batterydata +author: John Snow Labs +name: batteryonlybert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`batteryonlybert_cased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/batteryonlybert_cased_en_5.1.1_3.0_1694600323117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/batteryonlybert_cased_en_5.1.1_3.0_1694600323117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("batteryonlybert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("batteryonlybert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|batteryonlybert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/batterydata/batteryonlybert-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-batteryonlybert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-batteryonlybert_uncased_en.md new file mode 100644 index 00000000000000..82aee1dcea6004 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-batteryonlybert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English batteryonlybert_uncased BertEmbeddings from batterydata +author: John Snow Labs +name: batteryonlybert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`batteryonlybert_uncased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/batteryonlybert_uncased_en_5.1.1_3.0_1694600650112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/batteryonlybert_uncased_en_5.1.1_3.0_1694600650112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("batteryonlybert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("batteryonlybert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|batteryonlybert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/batterydata/batteryonlybert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-batteryscibert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-batteryscibert_cased_en.md new file mode 100644 index 00000000000000..f08dbd892aff21 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-batteryscibert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English batteryscibert_cased BertEmbeddings from batterydata +author: John Snow Labs +name: batteryscibert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`batteryscibert_cased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/batteryscibert_cased_en_5.1.1_3.0_1694585574709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/batteryscibert_cased_en_5.1.1_3.0_1694585574709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("batteryscibert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("batteryscibert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|batteryscibert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/batterydata/batteryscibert-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-batteryscibert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-batteryscibert_uncased_en.md new file mode 100644 index 00000000000000..f96644f9fe74c2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-batteryscibert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English batteryscibert_uncased BertEmbeddings from batterydata +author: John Snow Labs +name: batteryscibert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`batteryscibert_uncased` is a English model originally trained by batterydata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/batteryscibert_uncased_en_5.1.1_3.0_1694585738175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/batteryscibert_uncased_en_5.1.1_3.0_1694585738175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("batteryscibert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("batteryscibert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|batteryscibert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/batterydata/batteryscibert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bbc_gqa_eval_en.md b/docs/_posts/ahmedlone127/2023-09-13-bbc_gqa_eval_en.md new file mode 100644 index 00000000000000..8be4e06e21cb70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bbc_gqa_eval_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bbc_gqa_eval BertEmbeddings from rjbownes +author: John Snow Labs +name: bbc_gqa_eval +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bbc_gqa_eval` is a English model originally trained by rjbownes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bbc_gqa_eval_en_5.1.1_3.0_1694568579505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bbc_gqa_eval_en_5.1.1_3.0_1694568579505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bbc_gqa_eval","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bbc_gqa_eval", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bbc_gqa_eval| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/rjbownes/BBC-GQA-eval \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-beauty_base_klcp2_en.md b/docs/_posts/ahmedlone127/2023-09-13-beauty_base_klcp2_en.md new file mode 100644 index 00000000000000..c9e768c152c595 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-beauty_base_klcp2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English beauty_base_klcp2 BertEmbeddings from Kyoungmin +author: John Snow Labs +name: beauty_base_klcp2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beauty_base_klcp2` is a English model originally trained by Kyoungmin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beauty_base_klcp2_en_5.1.1_3.0_1694565176499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beauty_base_klcp2_en_5.1.1_3.0_1694565176499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("beauty_base_klcp2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("beauty_base_klcp2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beauty_base_klcp2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/Kyoungmin/beauty-base-KLCP2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bengali_bert_bn.md b/docs/_posts/ahmedlone127/2023-09-13-bengali_bert_bn.md new file mode 100644 index 00000000000000..0ae5fbf956517b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bengali_bert_bn.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Bengali bengali_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: bengali_bert +date: 2023-09-13 +tags: [bert, bn, open_source, fill_mask, onnx] +task: Embeddings +language: bn +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bengali_bert` is a Bengali model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bengali_bert_bn_5.1.1_3.0_1694644522311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bengali_bert_bn_5.1.1_3.0_1694644522311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bengali_bert","bn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bengali_bert", "bn") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bengali_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|bn| +|Size:|890.5 MB| + +## References + +https://huggingface.co/l3cube-pune/bengali-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-berdou_200k_en.md b/docs/_posts/ahmedlone127/2023-09-13-berdou_200k_en.md new file mode 100644 index 00000000000000..6dcb474ef98322 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-berdou_200k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English berdou_200k BertEmbeddings from flavio-nakasato +author: John Snow Labs +name: berdou_200k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berdou_200k` is a English model originally trained by flavio-nakasato. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berdou_200k_en_5.1.1_3.0_1694641173323.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berdou_200k_en_5.1.1_3.0_1694641173323.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("berdou_200k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("berdou_200k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berdou_200k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/flavio-nakasato/berdou_200k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-berdou_500k_en.md b/docs/_posts/ahmedlone127/2023-09-13-berdou_500k_en.md new file mode 100644 index 00000000000000..5e89f0cbc0081d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-berdou_500k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English berdou_500k BertEmbeddings from flavio-nakasato +author: John Snow Labs +name: berdou_500k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berdou_500k` is a English model originally trained by flavio-nakasato. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berdou_500k_en_5.1.1_3.0_1694641766005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berdou_500k_en_5.1.1_3.0_1694641766005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("berdou_500k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("berdou_500k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berdou_500k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/flavio-nakasato/berdou_500k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-berel_2.0_he.md b/docs/_posts/ahmedlone127/2023-09-13-berel_2.0_he.md new file mode 100644 index 00000000000000..5ffbe53c0946db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-berel_2.0_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew berel_2.0 BertEmbeddings from dicta-il +author: John Snow Labs +name: berel_2.0 +date: 2023-09-13 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berel_2.0` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berel_2.0_he_5.1.1_3.0_1694589736279.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berel_2.0_he_5.1.1_3.0_1694589736279.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("berel_2.0","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("berel_2.0", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berel_2.0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|690.0 MB| + +## References + +https://huggingface.co/dicta-il/BEREL_2.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-berel_dicta_il_he.md b/docs/_posts/ahmedlone127/2023-09-13-berel_dicta_il_he.md new file mode 100644 index 00000000000000..ea270841690679 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-berel_dicta_il_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew berel_dicta_il BertEmbeddings from dicta-il +author: John Snow Labs +name: berel_dicta_il +date: 2023-09-13 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berel_dicta_il` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berel_dicta_il_he_5.1.1_3.0_1694589503563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berel_dicta_il_he_5.1.1_3.0_1694589503563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("berel_dicta_il","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("berel_dicta_il", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berel_dicta_il| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|690.1 MB| + +## References + +https://huggingface.co/dicta-il/BEREL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-berel_sivan22_he.md b/docs/_posts/ahmedlone127/2023-09-13-berel_sivan22_he.md new file mode 100644 index 00000000000000..5f965cde2c7c69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-berel_sivan22_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew berel_sivan22 BertEmbeddings from sivan22 +author: John Snow Labs +name: berel_sivan22 +date: 2023-09-13 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`berel_sivan22` is a Hebrew model originally trained by sivan22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/berel_sivan22_he_5.1.1_3.0_1694564843869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/berel_sivan22_he_5.1.1_3.0_1694564843869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("berel_sivan22","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("berel_sivan22", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|berel_sivan22| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|690.1 MB| + +## References + +https://huggingface.co/sivan22/BEREL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_120_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_120_en.md new file mode 100644 index 00000000000000..3eef4b14c03791 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_120_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_120 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_120 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_120` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_120_en_5.1.1_3.0_1694585728673.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_120_en_5.1.1_3.0_1694585728673.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_120","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_120", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_120| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/gokuls/bert_base_120 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_24_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_24_en.md new file mode 100644 index 00000000000000..2c9ff0193ede01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_24_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_24 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_24 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_24` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_24_en_5.1.1_3.0_1694580140710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_24_en_5.1.1_3.0_1694580140710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_24","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_24", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_24| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/gokuls/bert_base_24 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_48_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_48_en.md new file mode 100644 index 00000000000000..90b6f1e13f7ff3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_48_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_48 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_48 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_48` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_48_en_5.1.1_3.0_1694580489996.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_48_en_5.1.1_3.0_1694580489996.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_48","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_48", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_48| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/gokuls/bert_base_48 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_5lang_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_5lang_cased_xx.md new file mode 100644 index 00000000000000..61728a0b5a0df5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_5lang_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_5lang_cased BertEmbeddings from amine +author: John Snow Labs +name: bert_base_5lang_cased +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_5lang_cased` is a Multilingual model originally trained by amine. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_5lang_cased_xx_5.1.1_3.0_1694579745976.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_5lang_cased_xx_5.1.1_3.0_1694579745976.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_5lang_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_5lang_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_5lang_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|461.1 MB| + +## References + +https://huggingface.co/amine/bert-base-5lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_72_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_72_en.md new file mode 100644 index 00000000000000..05ade8a445f4af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_72_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_72 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_72 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_72` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_72_en_5.1.1_3.0_1694581572483.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_72_en_5.1.1_3.0_1694581572483.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_72","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_72", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_72| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/gokuls/bert_base_72 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_96_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_96_en.md new file mode 100644 index 00000000000000..866e65f7f7d285 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_96_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_96 BertEmbeddings from gokuls +author: John Snow Labs +name: bert_base_96 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_96` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_96_en_5.1.1_3.0_1694581741068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_96_en_5.1.1_3.0_1694581741068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_96","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_96", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_96| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/gokuls/bert_base_96 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabert_ar.md new file mode 100644 index 00000000000000..5c99d91275cbaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_arabert BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_base_arabert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabert` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabert_ar_5.1.1_3.0_1694582655825.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabert_ar_5.1.1_3.0_1694582655825.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.6 MB| + +## References + +https://huggingface.co/aubmindlab/bert-base-arabert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv01_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv01_ar.md new file mode 100644 index 00000000000000..cfed471a9af0b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv01_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_arabertv01 BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_base_arabertv01 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabertv01` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabertv01_ar_5.1.1_3.0_1694582857463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabertv01_ar_5.1.1_3.0_1694582857463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabertv01","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabertv01", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabertv01| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|505.0 MB| + +## References + +https://huggingface.co/aubmindlab/bert-base-arabertv01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_ar.md new file mode 100644 index 00000000000000..67d9bda6177435 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_arabertv02 BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_base_arabertv02 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabertv02` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_ar_5.1.1_3.0_1694583153678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_ar_5.1.1_3.0_1694583153678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabertv02","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabertv02", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabertv02| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|505.1 MB| + +## References + +https://huggingface.co/aubmindlab/bert-base-arabertv02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_twitter_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_twitter_ar.md new file mode 100644 index 00000000000000..7b2ce7d378f205 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv02_twitter_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_arabertv02_twitter BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_base_arabertv02_twitter +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabertv02_twitter` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_twitter_ar_5.1.1_3.0_1694583024700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_twitter_ar_5.1.1_3.0_1694583024700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabertv02_twitter","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabertv02_twitter", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabertv02_twitter| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|505.0 MB| + +## References + +https://huggingface.co/aubmindlab/bert-base-arabertv02-twitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv2_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv2_ar.md new file mode 100644 index 00000000000000..df33f02fcf9d63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabertv2_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_arabertv2 BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_base_arabertv2 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabertv2` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabertv2_ar_5.1.1_3.0_1694583313889.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabertv2_ar_5.1.1_3.0_1694583313889.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabertv2","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabertv2", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.8 MB| + +## References + +https://huggingface.co/aubmindlab/bert-base-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabic_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabic_ar.md new file mode 100644 index 00000000000000..442db9215c95da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabic_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_arabic BertEmbeddings from asafaya +author: John Snow Labs +name: bert_base_arabic +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabic` is a Arabic model originally trained by asafaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabic_ar_5.1.1_3.0_1694581997506.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabic_ar_5.1.1_3.0_1694581997506.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabic","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabic", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|412.0 MB| + +## References + +https://huggingface.co/asafaya/bert-base-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabic_miner_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabic_miner_en.md new file mode 100644 index 00000000000000..cae8a1248a1c4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arabic_miner_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_arabic_miner BertEmbeddings from giganticode +author: John Snow Labs +name: bert_base_arabic_miner +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabic_miner` is a English model originally trained by giganticode. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabic_miner_en_5.1.1_3.0_1694649546700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabic_miner_en_5.1.1_3.0_1694649546700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabic_miner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabic_miner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabic_miner| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/giganticode/bert-base-ar_miner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_arapoembert_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arapoembert_en.md new file mode 100644 index 00000000000000..c4614ae96da625 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_arapoembert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_arapoembert BertEmbeddings from faisalq +author: John Snow Labs +name: bert_base_arapoembert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arapoembert` is a English model originally trained by faisalq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arapoembert_en_5.1.1_3.0_1694614180429.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arapoembert_en_5.1.1_3.0_1694614180429.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arapoembert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arapoembert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arapoembert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/faisalq/bert-base-arapoembert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_10_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_10_mlm_en.md new file mode 100644 index 00000000000000..bf002b47398ea9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_10_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_10_mlm BertEmbeddings from rithwik-db +author: John Snow Labs +name: bert_base_cased_10_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_10_mlm` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_10_mlm_en_5.1.1_3.0_1694615520000.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_10_mlm_en_5.1.1_3.0_1694615520000.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_10_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_10_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_10_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/rithwik-db/bert-base-cased-10-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_500_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_500_mlm_en.md new file mode 100644 index 00000000000000..aaefbd47534a47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_500_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_500_mlm BertEmbeddings from rithwik-db +author: John Snow Labs +name: bert_base_cased_500_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_500_mlm` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_500_mlm_en_5.1.1_3.0_1694616384008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_500_mlm_en_5.1.1_3.0_1694616384008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_500_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_500_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_500_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/rithwik-db/bert-base-cased-500-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_50_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_50_mlm_en.md new file mode 100644 index 00000000000000..5987f965b1449e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_50_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_50_mlm BertEmbeddings from rithwik-db +author: John Snow Labs +name: bert_base_cased_50_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_50_mlm` is a English model originally trained by rithwik-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_50_mlm_en_5.1.1_3.0_1694616027167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_50_mlm_en_5.1.1_3.0_1694616027167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_50_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_50_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_50_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/rithwik-db/bert-base-cased-50-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_b4h7_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_b4h7_en.md new file mode 100644 index 00000000000000..aaae0d23b50f37 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_b4h7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_b4h7 BertEmbeddings from mdroth +author: John Snow Labs +name: bert_base_cased_b4h7 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_b4h7` is a English model originally trained by mdroth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_b4h7_en_5.1.1_3.0_1694626233588.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_b4h7_en_5.1.1_3.0_1694626233588.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_b4h7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_b4h7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_b4h7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/mdroth/bert-base-cased_B4H7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_bert_yoga_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_bert_yoga_finetuned_en.md new file mode 100644 index 00000000000000..e9283fb3e215af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_bert_yoga_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_bert_yoga_finetuned BertEmbeddings from dsantistevan +author: John Snow Labs +name: bert_base_cased_bert_yoga_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_bert_yoga_finetuned` is a English model originally trained by dsantistevan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_bert_yoga_finetuned_en_5.1.1_3.0_1694566346146.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_bert_yoga_finetuned_en_5.1.1_3.0_1694566346146.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_bert_yoga_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_bert_yoga_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_bert_yoga_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/dsantistevan/bert-base-cased-bert-yoga-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_bert_auto7_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_bert_auto7_en.md new file mode 100644 index 00000000000000..348063868c202e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_bert_auto7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_bert_auto7 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_cased_finetuned_bert_auto7 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_bert_auto7` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_bert_auto7_en_5.1.1_3.0_1694639557369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_bert_auto7_en_5.1.1_3.0_1694639557369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_bert_auto7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_bert_auto7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_bert_auto7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-cased-finetuned-bert-auto7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_bert_mlm5_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_bert_mlm5_en.md new file mode 100644 index 00000000000000..adbe9eb0786dbb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_bert_mlm5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_bert_mlm5 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_cased_finetuned_bert_mlm5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_bert_mlm5` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_bert_mlm5_en_5.1.1_3.0_1694642519725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_bert_mlm5_en_5.1.1_3.0_1694642519725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_bert_mlm5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_bert_mlm5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_bert_mlm5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-cased-finetuned-BERT-mlm5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..fbe7bb23bb5b01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_imdb BertEmbeddings from TimShieh +author: John Snow Labs +name: bert_base_cased_finetuned_imdb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_imdb` is a English model originally trained by TimShieh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_imdb_en_5.1.1_3.0_1694580692039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_imdb_en_5.1.1_3.0_1694580692039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/TimShieh/bert-base-cased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2010_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2010_mlm_en.md new file mode 100644 index 00000000000000..6838dc89fc1dce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2010_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_semeval2010_mlm BertEmbeddings from TimShieh +author: John Snow Labs +name: bert_base_cased_finetuned_semeval2010_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_semeval2010_mlm` is a English model originally trained by TimShieh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_semeval2010_mlm_en_5.1.1_3.0_1694581084119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_semeval2010_mlm_en_5.1.1_3.0_1694581084119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_semeval2010_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_semeval2010_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_semeval2010_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/TimShieh/bert-base-cased-finetuned-semeval2010-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2017_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2017_mlm_en.md new file mode 100644 index 00000000000000..14698d2b675138 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_finetuned_semeval2017_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_semeval2017_mlm BertEmbeddings from TimShieh +author: John Snow Labs +name: bert_base_cased_finetuned_semeval2017_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_semeval2017_mlm` is a English model originally trained by TimShieh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_semeval2017_mlm_en_5.1.1_3.0_1694580947575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_semeval2017_mlm_en_5.1.1_3.0_1694580947575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_semeval2017_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_semeval2017_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_semeval2017_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/TimShieh/bert-base-cased-finetuned-semeval2017-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_googlere_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_googlere_en.md new file mode 100644 index 00000000000000..f8d9e5f2f87878 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_googlere_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_googlere BertEmbeddings from triet1102 +author: John Snow Labs +name: bert_base_cased_googlere +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_googlere` is a English model originally trained by triet1102. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_googlere_en_5.1.1_3.0_1694614086575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_googlere_en_5.1.1_3.0_1694614086575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_googlere","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_googlere", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_googlere| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/triet1102/bert-base-cased-GoogleRE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_googlere_masked_subj_obj_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_googlere_masked_subj_obj_en.md new file mode 100644 index 00000000000000..a2a1a54c42a41e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_googlere_masked_subj_obj_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_googlere_masked_subj_obj BertEmbeddings from triet1102 +author: John Snow Labs +name: bert_base_cased_googlere_masked_subj_obj +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_googlere_masked_subj_obj` is a English model originally trained by triet1102. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_googlere_masked_subj_obj_en_5.1.1_3.0_1694615522376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_googlere_masked_subj_obj_en_5.1.1_3.0_1694615522376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_googlere_masked_subj_obj","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_googlere_masked_subj_obj", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_googlere_masked_subj_obj| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/triet1102/bert-base-cased-GoogleRE-masked-subj-obj \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_model_attribution_challenge_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_model_attribution_challenge_en.md new file mode 100644 index 00000000000000..c80b08154ec392 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_model_attribution_challenge_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_model_attribution_challenge BertEmbeddings from model-attribution-challenge +author: John Snow Labs +name: bert_base_cased_model_attribution_challenge +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_model_attribution_challenge` is a English model originally trained by model-attribution-challenge. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_model_attribution_challenge_en_5.1.1_3.0_1694627672776.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_model_attribution_challenge_en_5.1.1_3.0_1694627672776.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_model_attribution_challenge","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_model_attribution_challenge", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_model_attribution_challenge| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/model-attribution-challenge/bert-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_c_corpus_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_c_corpus_en.md new file mode 100644 index 00000000000000..deb118ac14cd10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_c_corpus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_portuguese_c_corpus BertEmbeddings from rosimeirecosta +author: John Snow Labs +name: bert_base_cased_portuguese_c_corpus +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_portuguese_c_corpus` is a English model originally trained by rosimeirecosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_c_corpus_en_5.1.1_3.0_1694648701939.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_c_corpus_en_5.1.1_3.0_1694648701939.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_portuguese_c_corpus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_portuguese_c_corpus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_portuguese_c_corpus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/rosimeirecosta/bert-base-cased-pt-c-corpus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_alynneoya_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_alynneoya_en.md new file mode 100644 index 00000000000000..c2afc25f465d07 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_alynneoya_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_portuguese_lenerbr_alynneoya BertEmbeddings from alynneoya +author: John Snow Labs +name: bert_base_cased_portuguese_lenerbr_alynneoya +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_portuguese_lenerbr_alynneoya` is a English model originally trained by alynneoya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_alynneoya_en_5.1.1_3.0_1694611495066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_alynneoya_en_5.1.1_3.0_1694611495066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_portuguese_lenerbr_alynneoya","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_portuguese_lenerbr_alynneoya", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_portuguese_lenerbr_alynneoya| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/alynneoya/bert-base-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_luciolrv_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_luciolrv_en.md new file mode 100644 index 00000000000000..494def19cc4d4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_luciolrv_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_portuguese_lenerbr_luciolrv BertEmbeddings from luciolrv +author: John Snow Labs +name: bert_base_cased_portuguese_lenerbr_luciolrv +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_portuguese_lenerbr_luciolrv` is a English model originally trained by luciolrv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_luciolrv_en_5.1.1_3.0_1694588906119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_luciolrv_en_5.1.1_3.0_1694588906119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_portuguese_lenerbr_luciolrv","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_portuguese_lenerbr_luciolrv", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_portuguese_lenerbr_luciolrv| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/luciolrv/bert-base-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_pt.md new file mode 100644 index 00000000000000..8335847887a8d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_base_cased_portuguese_lenerbr BertEmbeddings from pierreguillou +author: John Snow Labs +name: bert_base_cased_portuguese_lenerbr +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_portuguese_lenerbr` is a Portuguese model originally trained by pierreguillou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_pt_5.1.1_3.0_1694563345437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_pt_5.1.1_3.0_1694563345437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_portuguese_lenerbr","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_portuguese_lenerbr", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_portuguese_lenerbr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/pierreguillou/bert-base-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_vittorio_girardi_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_vittorio_girardi_en.md new file mode 100644 index 00000000000000..bc36d06fbffd5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_cased_portuguese_lenerbr_vittorio_girardi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_portuguese_lenerbr_vittorio_girardi BertEmbeddings from vittorio-girardi +author: John Snow Labs +name: bert_base_cased_portuguese_lenerbr_vittorio_girardi +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_portuguese_lenerbr_vittorio_girardi` is a English model originally trained by vittorio-girardi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_vittorio_girardi_en_5.1.1_3.0_1694634389905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_portuguese_lenerbr_vittorio_girardi_en_5.1.1_3.0_1694634389905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_portuguese_lenerbr_vittorio_girardi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_portuguese_lenerbr_vittorio_girardi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_portuguese_lenerbr_vittorio_girardi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/vittorio-girardi/bert-base-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_chinese_complaint_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_chinese_complaint_128_en.md new file mode 100644 index 00000000000000..c6536d7c5700a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_chinese_complaint_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_chinese_complaint_128 BertEmbeddings from xxr +author: John Snow Labs +name: bert_base_chinese_complaint_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_chinese_complaint_128` is a English model originally trained by xxr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_chinese_complaint_128_en_5.1.1_3.0_1694627957165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_chinese_complaint_128_en_5.1.1_3.0_1694627957165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_chinese_complaint_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_chinese_complaint_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_chinese_complaint_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/xxr/bert-base-chinese-complaint-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_dewiki_v1_de.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dewiki_v1_de.md new file mode 100644 index 00000000000000..ce40740b60e807 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dewiki_v1_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German bert_base_dewiki_v1 BertEmbeddings from gwlms +author: John Snow Labs +name: bert_base_dewiki_v1 +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dewiki_v1` is a German model originally trained by gwlms. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dewiki_v1_de_5.1.1_3.0_1694623056420.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dewiki_v1_de_5.1.1_3.0_1694623056420.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dewiki_v1","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dewiki_v1", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dewiki_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|412.4 MB| + +## References + +https://huggingface.co/gwlms/bert-base-dewiki-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_en.md new file mode 100644 index 00000000000000..88af2281474189 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_dutch_cased BertEmbeddings from wietsedv +author: John Snow Labs +name: bert_base_dutch_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased` is a English model originally trained by wietsedv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_en_5.1.1_3.0_1694583992022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_en_5.1.1_3.0_1694583992022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/wietsedv/bert-base-dutch-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_finetuned_manx_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_finetuned_manx_en.md new file mode 100644 index 00000000000000..c1fd54a709286a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_finetuned_manx_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_dutch_cased_finetuned_manx BertEmbeddings from Pyjay +author: John Snow Labs +name: bert_base_dutch_cased_finetuned_manx +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_finetuned_manx` is a English model originally trained by Pyjay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_manx_en_5.1.1_3.0_1694569643622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_finetuned_manx_en_5.1.1_3.0_1694569643622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased_finetuned_manx","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased_finetuned_manx", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_finetuned_manx| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Pyjay/bert-base-dutch-cased-finetuned-gv \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_mlm_visio_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_mlm_visio_en.md new file mode 100644 index 00000000000000..863fea572e1288 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_dutch_cased_mlm_visio_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_dutch_cased_mlm_visio BertEmbeddings from jegormeister +author: John Snow Labs +name: bert_base_dutch_cased_mlm_visio +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_dutch_cased_mlm_visio` is a English model originally trained by jegormeister. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_mlm_visio_en_5.1.1_3.0_1694598191567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_dutch_cased_mlm_visio_en_5.1.1_3.0_1694598191567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_dutch_cased_mlm_visio","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_dutch_cased_mlm_visio", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_dutch_cased_mlm_visio| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/jegormeister/bert-base-dutch-cased-mlm-visio \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_finetuned_wellness_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finetuned_wellness_en.md new file mode 100644 index 00000000000000..c57a02843b5909 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finetuned_wellness_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_finetuned_wellness BertEmbeddings from gaeunseo +author: John Snow Labs +name: bert_base_finetuned_wellness +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_finetuned_wellness` is a English model originally trained by gaeunseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_finetuned_wellness_en_5.1.1_3.0_1694583681541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_finetuned_wellness_en_5.1.1_3.0_1694583681541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_finetuned_wellness","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_finetuned_wellness", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_finetuned_wellness| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/gaeunseo/bert-base-finetuned-wellness \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_cased_v1_fi.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_cased_v1_fi.md new file mode 100644 index 00000000000000..c3de96cbd33ceb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_cased_v1_fi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Finnish bert_base_finnish_cased_v1 BertEmbeddings from TurkuNLP +author: John Snow Labs +name: bert_base_finnish_cased_v1 +date: 2023-09-13 +tags: [bert, fi, open_source, fill_mask, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_finnish_cased_v1` is a Finnish model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_finnish_cased_v1_fi_5.1.1_3.0_1694573404619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_finnish_cased_v1_fi_5.1.1_3.0_1694573404619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_finnish_cased_v1","fi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_finnish_cased_v1", "fi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_finnish_cased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fi| +|Size:|464.7 MB| + +## References + +https://huggingface.co/TurkuNLP/bert-base-finnish-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_europeana_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_europeana_cased_en.md new file mode 100644 index 00000000000000..95edef13405007 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_europeana_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_finnish_europeana_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_finnish_europeana_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_finnish_europeana_cased` is a English model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_finnish_europeana_cased_en_5.1.1_3.0_1694594988308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_finnish_europeana_cased_en_5.1.1_3.0_1694594988308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_finnish_europeana_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_finnish_europeana_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_finnish_europeana_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.3 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-finnish-europeana-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_uncased_v1_fi.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_uncased_v1_fi.md new file mode 100644 index 00000000000000..902c15dd577caa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_finnish_uncased_v1_fi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Finnish bert_base_finnish_uncased_v1 BertEmbeddings from TurkuNLP +author: John Snow Labs +name: bert_base_finnish_uncased_v1 +date: 2023-09-13 +tags: [bert, fi, open_source, fill_mask, onnx] +task: Embeddings +language: fi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_finnish_uncased_v1` is a Finnish model originally trained by TurkuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_finnish_uncased_v1_fi_5.1.1_3.0_1694573578470.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_finnish_uncased_v1_fi_5.1.1_3.0_1694573578470.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_finnish_uncased_v1","fi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_finnish_uncased_v1", "fi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_finnish_uncased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|fi| +|Size:|464.7 MB| + +## References + +https://huggingface.co/TurkuNLP/bert-base-finnish-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_frozen_generics_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_frozen_generics_mlm_en.md new file mode 100644 index 00000000000000..a619f0ecb1195a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_frozen_generics_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_frozen_generics_mlm BertEmbeddings from sello-ralethe +author: John Snow Labs +name: bert_base_frozen_generics_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_frozen_generics_mlm` is a English model originally trained by sello-ralethe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_frozen_generics_mlm_en_5.1.1_3.0_1694572672553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_frozen_generics_mlm_en_5.1.1_3.0_1694572672553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_frozen_generics_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_frozen_generics_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_frozen_generics_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/sello-ralethe/bert-base-frozen-generics-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_generics_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_generics_mlm_en.md new file mode 100644 index 00000000000000..58af567f01aee7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_generics_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_generics_mlm BertEmbeddings from sello-ralethe +author: John Snow Labs +name: bert_base_generics_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_generics_mlm` is a English model originally trained by sello-ralethe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_generics_mlm_en_5.1.1_3.0_1694572846320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_generics_mlm_en_5.1.1_3.0_1694572846320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_generics_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_generics_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_generics_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/sello-ralethe/bert-base-generics-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_archaeo_de.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_archaeo_de.md new file mode 100644 index 00000000000000..ee417e4188bab9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_archaeo_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German bert_base_german_cased_archaeo BertEmbeddings from alexbrandsen +author: John Snow Labs +name: bert_base_german_cased_archaeo +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_cased_archaeo` is a German model originally trained by alexbrandsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_archaeo_de_5.1.1_3.0_1694608920923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_archaeo_de_5.1.1_3.0_1694608920923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_cased_archaeo","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_cased_archaeo", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_cased_archaeo| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/alexbrandsen/bert-base-german-cased-archaeo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_dbmdz_de.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_dbmdz_de.md new file mode 100644 index 00000000000000..347955729e9737 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_dbmdz_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German bert_base_german_cased_dbmdz BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_german_cased_dbmdz +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_cased_dbmdz` is a German model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_dbmdz_de_5.1.1_3.0_1694595151979.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_dbmdz_de_5.1.1_3.0_1694595151979.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_cased_dbmdz","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_cased_dbmdz", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_cased_dbmdz| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|409.9 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-german-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_domain_adaptation_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_domain_adaptation_accelerate_en.md new file mode 100644 index 00000000000000..8f2dcd6db4c97a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_domain_adaptation_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_german_cased_domain_adaptation_accelerate BertEmbeddings from rodrigotuna +author: John Snow Labs +name: bert_base_german_cased_domain_adaptation_accelerate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_cased_domain_adaptation_accelerate` is a English model originally trained by rodrigotuna. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_domain_adaptation_accelerate_en_5.1.1_3.0_1694565556342.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_domain_adaptation_accelerate_en_5.1.1_3.0_1694565556342.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_cased_domain_adaptation_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_cased_domain_adaptation_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_cased_domain_adaptation_accelerate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/rodrigotuna/bert-base-german-cased-domain-adaptation-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_finetuned_swiss_de.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_finetuned_swiss_de.md new file mode 100644 index 00000000000000..568ac4a96b1c4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_finetuned_swiss_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German bert_base_german_cased_finetuned_swiss BertEmbeddings from statworx +author: John Snow Labs +name: bert_base_german_cased_finetuned_swiss +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_cased_finetuned_swiss` is a German model originally trained by statworx. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_finetuned_swiss_de_5.1.1_3.0_1694635969690.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_finetuned_swiss_de_5.1.1_3.0_1694635969690.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_cased_finetuned_swiss","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_cased_finetuned_swiss", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_cased_finetuned_swiss| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/statworx/bert-base-german-cased-finetuned-swiss \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_issues_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_issues_128_en.md new file mode 100644 index 00000000000000..53132de0920d36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_issues_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_german_cased_issues_128 BertEmbeddings from ogimgio +author: John Snow Labs +name: bert_base_german_cased_issues_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_cased_issues_128` is a English model originally trained by ogimgio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_issues_128_en_5.1.1_3.0_1694595672332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_issues_128_en_5.1.1_3.0_1694595672332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_cased_issues_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_cased_issues_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_cased_issues_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ogimgio/bert-base-german-cased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_oldvocab_de.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_oldvocab_de.md new file mode 100644 index 00000000000000..4ba0dcbbacb836 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_german_cased_oldvocab_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German bert_base_german_cased_oldvocab BertEmbeddings from deepset +author: John Snow Labs +name: bert_base_german_cased_oldvocab +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_cased_oldvocab` is a German model originally trained by deepset. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_oldvocab_de_5.1.1_3.0_1694600841728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_oldvocab_de_5.1.1_3.0_1694600841728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_cased_oldvocab","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_cased_oldvocab", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_cased_oldvocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/deepset/bert-base-german-cased-oldvocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v1_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v1_finetuned_imdb_en.md new file mode 100644 index 00000000000000..df25665fc434e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v1_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_greek_uncased_v1_finetuned_imdb BertEmbeddings from snousias +author: John Snow Labs +name: bert_base_greek_uncased_v1_finetuned_imdb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_greek_uncased_v1_finetuned_imdb` is a English model originally trained by snousias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v1_finetuned_imdb_en_5.1.1_3.0_1694599489863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v1_finetuned_imdb_en_5.1.1_3.0_1694599489863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_greek_uncased_v1_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_greek_uncased_v1_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_greek_uncased_v1_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/snousias/bert-base-greek-uncased-v1-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v1_finetuned_polylex_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v1_finetuned_polylex_en.md new file mode 100644 index 00000000000000..9cea6160babbcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v1_finetuned_polylex_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_greek_uncased_v1_finetuned_polylex BertEmbeddings from snousias +author: John Snow Labs +name: bert_base_greek_uncased_v1_finetuned_polylex +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_greek_uncased_v1_finetuned_polylex` is a English model originally trained by snousias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v1_finetuned_polylex_en_5.1.1_3.0_1694599881004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v1_finetuned_polylex_en_5.1.1_3.0_1694599881004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_greek_uncased_v1_finetuned_polylex","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_greek_uncased_v1_finetuned_polylex", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_greek_uncased_v1_finetuned_polylex| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/snousias/bert-base-greek-uncased-v1-finetuned-polylex \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v2_finetuned_polylex_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v2_finetuned_polylex_en.md new file mode 100644 index 00000000000000..b3aec13782eb31 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v2_finetuned_polylex_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_greek_uncased_v2_finetuned_polylex BertEmbeddings from snousias +author: John Snow Labs +name: bert_base_greek_uncased_v2_finetuned_polylex +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_greek_uncased_v2_finetuned_polylex` is a English model originally trained by snousias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v2_finetuned_polylex_en_5.1.1_3.0_1694600262879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v2_finetuned_polylex_en_5.1.1_3.0_1694600262879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_greek_uncased_v2_finetuned_polylex","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_greek_uncased_v2_finetuned_polylex", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_greek_uncased_v2_finetuned_polylex| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/snousias/bert-base-greek-uncased-v2-finetuned-polylex \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v3_finetuned_polylex_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v3_finetuned_polylex_en.md new file mode 100644 index 00000000000000..eab4bc337bfed2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_greek_uncased_v3_finetuned_polylex_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_greek_uncased_v3_finetuned_polylex BertEmbeddings from snousias +author: John Snow Labs +name: bert_base_greek_uncased_v3_finetuned_polylex +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_greek_uncased_v3_finetuned_polylex` is a English model originally trained by snousias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v3_finetuned_polylex_en_5.1.1_3.0_1694605011287.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v3_finetuned_polylex_en_5.1.1_3.0_1694605011287.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_greek_uncased_v3_finetuned_polylex","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_greek_uncased_v3_finetuned_polylex", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_greek_uncased_v3_finetuned_polylex| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/snousias/bert-base-greek-uncased-v3-finetuned-polylex \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_dutch_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_dutch_cased_en.md new file mode 100644 index 00000000000000..4b86243905530f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_dutch_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_historic_dutch_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_historic_dutch_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_historic_dutch_cased` is a English model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_historic_dutch_cased_en_5.1.1_3.0_1694595509546.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_historic_dutch_cased_en_5.1.1_3.0_1694595509546.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_historic_dutch_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_historic_dutch_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_historic_dutch_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-historic-dutch-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_english_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_english_cased_en.md new file mode 100644 index 00000000000000..427a4b95ed78f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_english_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_historic_english_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_historic_english_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_historic_english_cased` is a English model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_historic_english_cased_en_5.1.1_3.0_1694595905919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_historic_english_cased_en_5.1.1_3.0_1694595905919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_historic_english_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_historic_english_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_historic_english_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.7 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-historic-english-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_multilingual_64k_td_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_multilingual_64k_td_cased_xx.md new file mode 100644 index 00000000000000..8928e9d251c364 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_multilingual_64k_td_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_historic_multilingual_64k_td_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_historic_multilingual_64k_td_cased +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_historic_multilingual_64k_td_cased` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_historic_multilingual_64k_td_cased_xx_5.1.1_3.0_1694618977228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_historic_multilingual_64k_td_cased_xx_5.1.1_3.0_1694618977228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_historic_multilingual_64k_td_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_historic_multilingual_64k_td_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_historic_multilingual_64k_td_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|504.6 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-historic-multilingual-64k-td-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_multilingual_cased_xx.md new file mode 100644 index 00000000000000..bccbc9b331cdaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historic_multilingual_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_historic_multilingual_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_historic_multilingual_cased +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_historic_multilingual_cased` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_historic_multilingual_cased_xx_5.1.1_3.0_1694596352201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_historic_multilingual_cased_xx_5.1.1_3.0_1694596352201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_historic_multilingual_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_historic_multilingual_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_historic_multilingual_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|412.1 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-historic-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_historical_german_kinyarwanda_cased_de.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historical_german_kinyarwanda_cased_de.md new file mode 100644 index 00000000000000..54c81fefde4399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_historical_german_kinyarwanda_cased_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German bert_base_historical_german_kinyarwanda_cased BertEmbeddings from redewiedergabe +author: John Snow Labs +name: bert_base_historical_german_kinyarwanda_cased +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_historical_german_kinyarwanda_cased` is a German model originally trained by redewiedergabe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_historical_german_kinyarwanda_cased_de_5.1.1_3.0_1694567053989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_historical_german_kinyarwanda_cased_de_5.1.1_3.0_1694567053989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_historical_german_kinyarwanda_cased","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_historical_german_kinyarwanda_cased", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_historical_german_kinyarwanda_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/redewiedergabe/bert-base-historical-german-rw-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_indonesian_1.5g_id.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_indonesian_1.5g_id.md new file mode 100644 index 00000000000000..93254d6d1cc62b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_indonesian_1.5g_id.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Indonesian bert_base_indonesian_1.5g BertEmbeddings from cahya +author: John Snow Labs +name: bert_base_indonesian_1.5g +date: 2023-09-13 +tags: [bert, id, open_source, fill_mask, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_indonesian_1.5g` is a Indonesian model originally trained by cahya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_indonesian_1.5g_id_5.1.1_3.0_1694590366871.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_indonesian_1.5g_id_5.1.1_3.0_1694590366871.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_indonesian_1.5g","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_indonesian_1.5g", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_indonesian_1.5g| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|id| +|Size:|412.6 MB| + +## References + +https://huggingface.co/cahya/bert-base-indonesian-1.5G \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_indonesian_522m_id.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_indonesian_522m_id.md new file mode 100644 index 00000000000000..12c7e0e54371da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_indonesian_522m_id.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Indonesian bert_base_indonesian_522m BertEmbeddings from cahya +author: John Snow Labs +name: bert_base_indonesian_522m +date: 2023-09-13 +tags: [bert, id, open_source, fill_mask, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_indonesian_522m` is a Indonesian model originally trained by cahya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_indonesian_522m_id_5.1.1_3.0_1694590524543.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_indonesian_522m_id_5.1.1_3.0_1694590524543.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_indonesian_522m","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_indonesian_522m", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_indonesian_522m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|id| +|Size:|412.5 MB| + +## References + +https://huggingface.co/cahya/bert-base-indonesian-522M \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_dbmdz_it.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_dbmdz_it.md new file mode 100644 index 00000000000000..926f2019ac7794 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_dbmdz_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian bert_base_italian_cased_dbmdz BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_italian_cased_dbmdz +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_italian_cased_dbmdz` is a Italian model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_italian_cased_dbmdz_it_5.1.1_3.0_1694596864624.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_italian_cased_dbmdz_it_5.1.1_3.0_1694596864624.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_italian_cased_dbmdz","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_italian_cased_dbmdz", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_italian_cased_dbmdz| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.6 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-italian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_osiria_it.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_osiria_it.md new file mode 100644 index 00000000000000..d08ae6ea1fc640 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_cased_osiria_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian bert_base_italian_cased_osiria BertEmbeddings from osiria +author: John Snow Labs +name: bert_base_italian_cased_osiria +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_italian_cased_osiria` is a Italian model originally trained by osiria. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_italian_cased_osiria_it_5.1.1_3.0_1694568086773.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_italian_cased_osiria_it_5.1.1_3.0_1694568086773.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_italian_cased_osiria","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_italian_cased_osiria", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_italian_cased_osiria| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.0 MB| + +## References + +https://huggingface.co/osiria/bert-base-italian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_uncased_dbmdz_it.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_uncased_dbmdz_it.md new file mode 100644 index 00000000000000..f0206e9a3f2d72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_uncased_dbmdz_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian bert_base_italian_uncased_dbmdz BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_italian_uncased_dbmdz +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_italian_uncased_dbmdz` is a Italian model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_italian_uncased_dbmdz_it_5.1.1_3.0_1694597192371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_italian_uncased_dbmdz_it_5.1.1_3.0_1694597192371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_italian_uncased_dbmdz","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_italian_uncased_dbmdz", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_italian_uncased_dbmdz| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.7 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-italian-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_uncased_osiria_it.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_uncased_osiria_it.md new file mode 100644 index 00000000000000..96dfd316df2714 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_uncased_osiria_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian bert_base_italian_uncased_osiria BertEmbeddings from osiria +author: John Snow Labs +name: bert_base_italian_uncased_osiria +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_italian_uncased_osiria` is a Italian model originally trained by osiria. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_italian_uncased_osiria_it_5.1.1_3.0_1694576263884.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_italian_uncased_osiria_it_5.1.1_3.0_1694576263884.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_italian_uncased_osiria","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_italian_uncased_osiria", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_italian_uncased_osiria| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|407.1 MB| + +## References + +https://huggingface.co/osiria/bert-base-italian-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_xxl_cased_it.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_xxl_cased_it.md new file mode 100644 index 00000000000000..6dacfaa0825933 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_xxl_cased_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian bert_base_italian_xxl_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_italian_xxl_cased +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_italian_xxl_cased` is a Italian model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_italian_xxl_cased_it_5.1.1_3.0_1694597718333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_italian_xxl_cased_it_5.1.1_3.0_1694597718333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_italian_xxl_cased","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_italian_xxl_cased", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_italian_xxl_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|412.6 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-italian-xxl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_xxl_uncased_it.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_xxl_uncased_it.md new file mode 100644 index 00000000000000..2ddd032300c379 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_italian_xxl_uncased_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian bert_base_italian_xxl_uncased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_italian_xxl_uncased +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_italian_xxl_uncased` is a Italian model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_italian_xxl_uncased_it_5.1.1_3.0_1694598081351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_italian_xxl_uncased_it_5.1.1_3.0_1694598081351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_italian_xxl_uncased","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_italian_xxl_uncased", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_italian_xxl_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|412.6 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-italian-xxl-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_japanese_ssuw_ja.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_japanese_ssuw_ja.md new file mode 100644 index 00000000000000..a423f277841279 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_japanese_ssuw_ja.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Japanese bert_base_japanese_ssuw BertEmbeddings from ku-accms +author: John Snow Labs +name: bert_base_japanese_ssuw +date: 2023-09-13 +tags: [bert, ja, open_source, fill_mask, onnx] +task: Embeddings +language: ja +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_japanese_ssuw` is a Japanese model originally trained by ku-accms. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_japanese_ssuw_ja_5.1.1_3.0_1694642519721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_japanese_ssuw_ja_5.1.1_3.0_1694642519721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_japanese_ssuw","ja") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_japanese_ssuw", "ja") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_japanese_ssuw| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ja| +|Size:|412.0 MB| + +## References + +https://huggingface.co/ku-accms/bert-base-japanese-ssuw \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_low_resource_wellness_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_low_resource_wellness_en.md new file mode 100644 index 00000000000000..606af3b8d46f10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_low_resource_wellness_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_low_resource_wellness BertEmbeddings from gaeunseo +author: John Snow Labs +name: bert_base_low_resource_wellness +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_low_resource_wellness` is a English model originally trained by gaeunseo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_low_resource_wellness_en_5.1.1_3.0_1694590425595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_low_resource_wellness_en_5.1.1_3.0_1694590425595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_low_resource_wellness","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_low_resource_wellness", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_low_resource_wellness| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/gaeunseo/bert-base-low_resource-wellness \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_bulgarian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_bulgarian_cased_en.md new file mode 100644 index 00000000000000..87202748969a34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_bulgarian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_macedonian_bulgarian_cased BertEmbeddings from anon-submission-mk +author: John Snow Labs +name: bert_base_macedonian_bulgarian_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_macedonian_bulgarian_cased` is a English model originally trained by anon-submission-mk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_macedonian_bulgarian_cased_en_5.1.1_3.0_1694580092524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_macedonian_bulgarian_cased_en_5.1.1_3.0_1694580092524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_macedonian_bulgarian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_macedonian_bulgarian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_macedonian_bulgarian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.1 MB| + +## References + +https://huggingface.co/anon-submission-mk/bert-base-macedonian-bulgarian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_cased_en.md new file mode 100644 index 00000000000000..915665f614518f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_macedonian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_macedonian_cased BertEmbeddings from anon-submission-mk +author: John Snow Labs +name: bert_base_macedonian_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_macedonian_cased` is a English model originally trained by anon-submission-mk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_macedonian_cased_en_5.1.1_3.0_1694580230869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_macedonian_cased_en_5.1.1_3.0_1694580230869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_macedonian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_macedonian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_macedonian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.2 MB| + +## References + +https://huggingface.co/anon-submission-mk/bert-base-macedonian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_finetuned_am_shb_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_finetuned_am_shb_xx.md new file mode 100644 index 00000000000000..75b7edd1e0c4e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_finetuned_am_shb_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_finetuned_am_shb BertEmbeddings from am-shb +author: John Snow Labs +name: bert_base_multilingual_cased_finetuned_am_shb +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_finetuned_am_shb` is a Multilingual model originally trained by am-shb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_finetuned_am_shb_xx_5.1.1_3.0_1694579098188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_finetuned_am_shb_xx_5.1.1_3.0_1694579098188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_multilingual_cased_finetuned_am_shb","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_multilingual_cased_finetuned_am_shb", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_finetuned_am_shb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/am-shb/bert-base-multilingual-cased-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_finetuned_lener_breton_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_finetuned_lener_breton_xx.md new file mode 100644 index 00000000000000..40d3a6bee214e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_finetuned_lener_breton_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_finetuned_lener_breton BertEmbeddings from Luciano +author: John Snow Labs +name: bert_base_multilingual_cased_finetuned_lener_breton +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_finetuned_lener_breton` is a Multilingual model originally trained by Luciano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_finetuned_lener_breton_xx_5.1.1_3.0_1694617850537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_finetuned_lener_breton_xx_5.1.1_3.0_1694617850537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_multilingual_cased_finetuned_lener_breton","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_multilingual_cased_finetuned_lener_breton", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_finetuned_lener_breton| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.1 MB| + +## References + +https://huggingface.co/Luciano/bert-base-multilingual-cased-finetuned-lener_br \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_iwslt14deen_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_iwslt14deen_xx.md new file mode 100644 index 00000000000000..5609e17478e5a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_iwslt14deen_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_iwslt14deen BertEmbeddings from miugod +author: John Snow Labs +name: bert_base_multilingual_cased_iwslt14deen +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_iwslt14deen` is a Multilingual model originally trained by miugod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_iwslt14deen_xx_5.1.1_3.0_1694598191715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_iwslt14deen_xx_5.1.1_3.0_1694598191715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_multilingual_cased_iwslt14deen","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_multilingual_cased_iwslt14deen", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_iwslt14deen| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|664.2 MB| + +## References + +https://huggingface.co/miugod/bert-base-multilingual-cased-iwslt14deen \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_urgency_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_urgency_xx.md new file mode 100644 index 00000000000000..54f481e3105cb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_cased_urgency_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_cased_urgency BertEmbeddings from Narshion +author: John Snow Labs +name: bert_base_multilingual_cased_urgency +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_cased_urgency` is a Multilingual model originally trained by Narshion. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_urgency_xx_5.1.1_3.0_1694568412458.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_cased_urgency_xx_5.1.1_3.0_1694568412458.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_multilingual_cased_urgency","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_multilingual_cased_urgency", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_cased_urgency| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|662.2 MB| + +## References + +https://huggingface.co/Narshion/bert-base-multilingual-cased-urgency \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_uncased_finetuned_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_uncased_finetuned_xx.md new file mode 100644 index 00000000000000..965f449592bc25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_uncased_finetuned_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_uncased_finetuned BertEmbeddings from am-shb +author: John Snow Labs +name: bert_base_multilingual_uncased_finetuned +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_uncased_finetuned` is a Multilingual model originally trained by am-shb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_uncased_finetuned_xx_5.1.1_3.0_1694579287846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_uncased_finetuned_xx_5.1.1_3.0_1694579287846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_multilingual_uncased_finetuned","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_multilingual_uncased_finetuned", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_uncased_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|625.5 MB| + +## References + +https://huggingface.co/am-shb/bert-base-multilingual-uncased-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_uncased_pretrained_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_uncased_pretrained_xx.md new file mode 100644 index 00000000000000..925cbcc12df313 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_multilingual_uncased_pretrained_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_base_multilingual_uncased_pretrained BertEmbeddings from am-shb +author: John Snow Labs +name: bert_base_multilingual_uncased_pretrained +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_multilingual_uncased_pretrained` is a Multilingual model originally trained by am-shb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_uncased_pretrained_xx_5.1.1_3.0_1694579516141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_multilingual_uncased_pretrained_xx_5.1.1_3.0_1694579516141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_multilingual_uncased_pretrained","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_multilingual_uncased_pretrained", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_multilingual_uncased_pretrained| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|625.5 MB| + +## References + +https://huggingface.co/am-shb/bert-base-multilingual-uncased-pretrained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_en.md new file mode 100644 index 00000000000000..ee2fc4b398a3b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_nli BertEmbeddings from binwang +author: John Snow Labs +name: bert_base_nli +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_nli` is a English model originally trained by binwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_nli_en_5.1.1_3.0_1694587924757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_nli_en_5.1.1_3.0_1694587924757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_nli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_nli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_nli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/binwang/bert-base-nli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_stsb_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_stsb_en.md new file mode 100644 index 00000000000000..10976f99b7af41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_nli_stsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_nli_stsb BertEmbeddings from binwang +author: John Snow Labs +name: bert_base_nli_stsb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_nli_stsb` is a English model originally trained by binwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_nli_stsb_en_5.1.1_3.0_1694587761118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_nli_stsb_en_5.1.1_3.0_1694587761118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_nli_stsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_nli_stsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_nli_stsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/binwang/bert-base-nli-stsb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_pashto_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_pashto_en.md new file mode 100644 index 00000000000000..79e6809ee1b7b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_pashto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_pashto BertEmbeddings from ijazulhaq +author: John Snow Labs +name: bert_base_pashto +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_pashto` is a English model originally trained by ijazulhaq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_pashto_en_5.1.1_3.0_1694628431394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_pashto_en_5.1.1_3.0_1694628431394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_pashto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_pashto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_pashto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.4 MB| + +## References + +https://huggingface.co/ijazulhaq/bert-base-pashto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_pashto_v1_ps.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_pashto_v1_ps.md new file mode 100644 index 00000000000000..b618258ebd17d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_pashto_v1_ps.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Pashto, Pushto bert_base_pashto_v1 BertEmbeddings from ijazulhaq +author: John Snow Labs +name: bert_base_pashto_v1 +date: 2023-09-13 +tags: [bert, ps, open_source, fill_mask, onnx] +task: Embeddings +language: ps +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_pashto_v1` is a Pashto, Pushto model originally trained by ijazulhaq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_pashto_v1_ps_5.1.1_3.0_1694648224326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_pashto_v1_ps_5.1.1_3.0_1694648224326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_pashto_v1","ps") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_pashto_v1", "ps") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_pashto_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ps| +|Size:|406.3 MB| + +## References + +https://huggingface.co/ijazulhaq/bert-base-pashto-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_persian_sport_bert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_persian_sport_bert_uncased_en.md new file mode 100644 index 00000000000000..25f4faa459ab63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_persian_sport_bert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_persian_sport_bert_uncased BertEmbeddings from montazeri +author: John Snow Labs +name: bert_base_persian_sport_bert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_persian_sport_bert_uncased` is a English model originally trained by montazeri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_persian_sport_bert_uncased_en_5.1.1_3.0_1694564033173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_persian_sport_bert_uncased_en_5.1.1_3.0_1694564033173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_persian_sport_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_persian_sport_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_persian_sport_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.0 MB| + +## References + +https://huggingface.co/montazeri/bert-base-persian-sport-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_polish_uncased_v1_pl.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_polish_uncased_v1_pl.md new file mode 100644 index 00000000000000..8f0551a218592c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_polish_uncased_v1_pl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Polish bert_base_polish_uncased_v1 BertEmbeddings from dkleczek +author: John Snow Labs +name: bert_base_polish_uncased_v1 +date: 2023-09-13 +tags: [bert, pl, open_source, fill_mask, onnx] +task: Embeddings +language: pl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_polish_uncased_v1` is a Polish model originally trained by dkleczek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_polish_uncased_v1_pl_5.1.1_3.0_1694625516398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_polish_uncased_v1_pl_5.1.1_3.0_1694625516398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_polish_uncased_v1","pl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_polish_uncased_v1", "pl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_polish_uncased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pl| +|Size:|493.5 MB| + +## References + +https://huggingface.co/dkleczek/bert-base-polish-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_chico_xavier_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_chico_xavier_en.md new file mode 100644 index 00000000000000..dbc92962015805 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_chico_xavier_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_portuguese_cased_finetuned_chico_xavier BertEmbeddings from gabrielgmendonca +author: John Snow Labs +name: bert_base_portuguese_cased_finetuned_chico_xavier +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_finetuned_chico_xavier` is a English model originally trained by gabrielgmendonca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_chico_xavier_en_5.1.1_3.0_1694587483813.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_chico_xavier_en_5.1.1_3.0_1694587483813.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_finetuned_chico_xavier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_finetuned_chico_xavier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_finetuned_chico_xavier| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/gabrielgmendonca/bert-base-portuguese-cased-finetuned-chico-xavier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_enjoei_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_enjoei_en.md new file mode 100644 index 00000000000000..2fe536e596bd4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_enjoei_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_portuguese_cased_finetuned_enjoei BertEmbeddings from gabrielgmendonca +author: John Snow Labs +name: bert_base_portuguese_cased_finetuned_enjoei +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_finetuned_enjoei` is a English model originally trained by gabrielgmendonca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_enjoei_en_5.1.1_3.0_1694618398309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_enjoei_en_5.1.1_3.0_1694618398309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_finetuned_enjoei","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_finetuned_enjoei", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_finetuned_enjoei| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/gabrielgmendonca/bert-base-portuguese-cased-finetuned-enjoei \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_peticoes_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_peticoes_pt.md new file mode 100644 index 00000000000000..d641307dc0233e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_peticoes_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_base_portuguese_cased_finetuned_peticoes BertEmbeddings from Luciano +author: John Snow Labs +name: bert_base_portuguese_cased_finetuned_peticoes +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_finetuned_peticoes` is a Portuguese model originally trained by Luciano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_peticoes_pt_5.1.1_3.0_1694566319648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_peticoes_pt_5.1.1_3.0_1694566319648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_finetuned_peticoes","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_finetuned_peticoes", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_finetuned_peticoes| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/Luciano/bert-base-portuguese-cased-finetuned-peticoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_tcu_acordaos_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_tcu_acordaos_pt.md new file mode 100644 index 00000000000000..59ef6cbf3b777a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_portuguese_cased_finetuned_tcu_acordaos_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_base_portuguese_cased_finetuned_tcu_acordaos BertEmbeddings from Luciano +author: John Snow Labs +name: bert_base_portuguese_cased_finetuned_tcu_acordaos +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_portuguese_cased_finetuned_tcu_acordaos` is a Portuguese model originally trained by Luciano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_tcu_acordaos_pt_5.1.1_3.0_1694566507028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_portuguese_cased_finetuned_tcu_acordaos_pt_5.1.1_3.0_1694566507028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_portuguese_cased_finetuned_tcu_acordaos","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_portuguese_cased_finetuned_tcu_acordaos", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_portuguese_cased_finetuned_tcu_acordaos| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/Luciano/bert-base-portuguese-cased-finetuned-tcu-acordaos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1790k_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1790k_ar.md new file mode 100644 index 00000000000000..12aa435d15dd47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1790k_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_qarib60_1790k BertEmbeddings from qarib +author: John Snow Labs +name: bert_base_qarib60_1790k +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_qarib60_1790k` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_1790k_ar_5.1.1_3.0_1694565184223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_1790k_ar_5.1.1_3.0_1694565184223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_qarib60_1790k","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_qarib60_1790k", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_qarib60_1790k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib60_1790k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1970k_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1970k_ar.md new file mode 100644 index 00000000000000..c10604c566e8e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_1970k_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_qarib60_1970k BertEmbeddings from qarib +author: John Snow Labs +name: bert_base_qarib60_1970k +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_qarib60_1970k` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_1970k_ar_5.1.1_3.0_1694565365666.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_1970k_ar_5.1.1_3.0_1694565365666.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_qarib60_1970k","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_qarib60_1970k", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_qarib60_1970k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib60_1970k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_860k_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_860k_ar.md new file mode 100644 index 00000000000000..d2ee1a6aae964d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib60_860k_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_qarib60_860k BertEmbeddings from qarib +author: John Snow Labs +name: bert_base_qarib60_860k +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_qarib60_860k` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_860k_ar_5.1.1_3.0_1694565528171.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_qarib60_860k_ar_5.1.1_3.0_1694565528171.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_qarib60_860k","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_qarib60_860k", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_qarib60_860k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.9 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib60_860k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib_ar.md new file mode 100644 index 00000000000000..0692ef43688be0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_qarib_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_base_qarib BertEmbeddings from qarib +author: John Snow Labs +name: bert_base_qarib +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_qarib` is a Arabic model originally trained by qarib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_qarib_ar_5.1.1_3.0_1694565010110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_qarib_ar_5.1.1_3.0_1694565010110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_qarib","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_qarib", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_qarib| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.0 MB| + +## References + +https://huggingface.co/qarib/bert-base-qarib \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_romanian_cased_v1_ro.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_romanian_cased_v1_ro.md new file mode 100644 index 00000000000000..e92c70cd72616d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_romanian_cased_v1_ro.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Moldavian, Moldovan, Romanian bert_base_romanian_cased_v1 BertEmbeddings from dumitrescustefan +author: John Snow Labs +name: bert_base_romanian_cased_v1 +date: 2023-09-13 +tags: [bert, ro, open_source, fill_mask, onnx] +task: Embeddings +language: ro +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_romanian_cased_v1` is a Moldavian, Moldovan, Romanian model originally trained by dumitrescustefan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_romanian_cased_v1_ro_5.1.1_3.0_1694627761142.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_romanian_cased_v1_ro_5.1.1_3.0_1694627761142.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_romanian_cased_v1","ro") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_romanian_cased_v1", "ro") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_romanian_cased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ro| +|Size:|464.0 MB| + +## References + +https://huggingface.co/dumitrescustefan/bert-base-romanian-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_romanian_uncased_v1_ro.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_romanian_uncased_v1_ro.md new file mode 100644 index 00000000000000..342db426389a75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_romanian_uncased_v1_ro.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Moldavian, Moldovan, Romanian bert_base_romanian_uncased_v1 BertEmbeddings from dumitrescustefan +author: John Snow Labs +name: bert_base_romanian_uncased_v1 +date: 2023-09-13 +tags: [bert, ro, open_source, fill_mask, onnx] +task: Embeddings +language: ro +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_romanian_uncased_v1` is a Moldavian, Moldovan, Romanian model originally trained by dumitrescustefan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_romanian_uncased_v1_ro_5.1.1_3.0_1694628307563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_romanian_uncased_v1_ro_5.1.1_3.0_1694628307563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_romanian_uncased_v1","ro") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_romanian_uncased_v1", "ro") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_romanian_uncased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ro| +|Size:|464.4 MB| + +## References + +https://huggingface.co/dumitrescustefan/bert-base-romanian-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_amvv_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_amvv_uncased_en.md new file mode 100644 index 00000000000000..da6672cd276449 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_amvv_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_spanish_amvv_uncased BertEmbeddings from amvargasv +author: John Snow Labs +name: bert_base_spanish_amvv_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_amvv_uncased` is a English model originally trained by amvargasv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_amvv_uncased_en_5.1.1_3.0_1694647970479.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_amvv_uncased_en_5.1.1_3.0_1694647970479.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_amvv_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_amvv_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_amvv_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/amvargasv/bert-base-spanish-amvv-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_dccuchile_es.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_dccuchile_es.md new file mode 100644 index 00000000000000..7c551b16413123 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_dccuchile_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish bert_base_spanish_wwm_cased_dccuchile BertEmbeddings from dccuchile +author: John Snow Labs +name: bert_base_spanish_wwm_cased_dccuchile +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_dccuchile` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_dccuchile_es_5.1.1_3.0_1694600005028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_dccuchile_es_5.1.1_3.0_1694600005028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_cased_dccuchile","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_cased_dccuchile", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_dccuchile| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/dccuchile/bert-base-spanish-wwm-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_literature_pro_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_literature_pro_en.md new file mode 100644 index 00000000000000..0cc9b509d799fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_literature_pro_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_spanish_wwm_cased_finetuned_literature_pro BertEmbeddings from a-v-bely +author: John Snow Labs +name: bert_base_spanish_wwm_cased_finetuned_literature_pro +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_finetuned_literature_pro` is a English model originally trained by a-v-bely. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_literature_pro_en_5.1.1_3.0_1694586382623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_literature_pro_en_5.1.1_3.0_1694586382623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_cased_finetuned_literature_pro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_cased_finetuned_literature_pro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_finetuned_literature_pro| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/a-v-bely/bert-base-spanish-wwm-cased-finetuned-literature-pro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_tweets_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_tweets_en.md new file mode 100644 index 00000000000000..bc43e9ef6b52f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_tweets_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_spanish_wwm_cased_finetuned_tweets BertEmbeddings from myahan007 +author: John Snow Labs +name: bert_base_spanish_wwm_cased_finetuned_tweets +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_finetuned_tweets` is a English model originally trained by myahan007. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_tweets_en_5.1.1_3.0_1694588752414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_tweets_en_5.1.1_3.0_1694588752414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_cased_finetuned_tweets","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_cased_finetuned_tweets", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_finetuned_tweets| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/myahan007/bert-base-spanish-wwm-cased-finetuned-tweets \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_tweets_es.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_tweets_es.md new file mode 100644 index 00000000000000..78f8e86f7a7aed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_finetuned_tweets_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish bert_base_spanish_wwm_cased_finetuned_tweets BertEmbeddings from mariav +author: John Snow Labs +name: bert_base_spanish_wwm_cased_finetuned_tweets +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_finetuned_tweets` is a Castilian, Spanish model originally trained by mariav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_tweets_es_5.1.1_3.0_1694586672799.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_tweets_es_5.1.1_3.0_1694586672799.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_cased_finetuned_tweets","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_cased_finetuned_tweets", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_finetuned_tweets| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.4 MB| + +## References + +https://huggingface.co/mariav/bert-base-spanish-wwm-cased-finetuned-tweets \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_plai_edp_test_es.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_plai_edp_test_es.md new file mode 100644 index 00000000000000..dc85095149dca8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_cased_plai_edp_test_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish bert_base_spanish_wwm_cased_plai_edp_test BertEmbeddings from plai-edp-test +author: John Snow Labs +name: bert_base_spanish_wwm_cased_plai_edp_test +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_plai_edp_test` is a Castilian, Spanish model originally trained by plai-edp-test. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_plai_edp_test_es_5.1.1_3.0_1694572546577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_plai_edp_test_es_5.1.1_3.0_1694572546577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_cased_plai_edp_test","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_cased_plai_edp_test", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_plai_edp_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/plai-edp-test/bert_base_spanish_wwm_cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_uncased_es.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_uncased_es.md new file mode 100644 index 00000000000000..e4a6622f220fdb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_uncased_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish bert_base_spanish_wwm_uncased BertEmbeddings from dccuchile +author: John Snow Labs +name: bert_base_spanish_wwm_uncased +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_uncased` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_uncased_es_5.1.1_3.0_1694600405744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_uncased_es_5.1.1_3.0_1694600405744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_uncased","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_uncased", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.6 MB| + +## References + +https://huggingface.co/dccuchile/bert-base-spanish-wwm-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_uncased_finetuned_imdb_spanish_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_uncased_finetuned_imdb_spanish_en.md new file mode 100644 index 00000000000000..6262eb8343343f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_spanish_wwm_uncased_finetuned_imdb_spanish_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_spanish_wwm_uncased_finetuned_imdb_spanish BertEmbeddings from GabrielDGC +author: John Snow Labs +name: bert_base_spanish_wwm_uncased_finetuned_imdb_spanish +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_uncased_finetuned_imdb_spanish` is a English model originally trained by GabrielDGC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_uncased_finetuned_imdb_spanish_en_5.1.1_3.0_1694578377936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_uncased_finetuned_imdb_spanish_en_5.1.1_3.0_1694578377936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_uncased_finetuned_imdb_spanish","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_uncased_finetuned_imdb_spanish", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_uncased_finetuned_imdb_spanish| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/GabrielDGC/bert-base-spanish-wwm-uncased-finetuned-imdb-spanish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_stackoverflow_comments_1m_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_stackoverflow_comments_1m_en.md new file mode 100644 index 00000000000000..67b2321f93e974 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_stackoverflow_comments_1m_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_stackoverflow_comments_1m BertEmbeddings from giganticode +author: John Snow Labs +name: bert_base_stackoverflow_comments_1m +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_stackoverflow_comments_1m` is a English model originally trained by giganticode. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_stackoverflow_comments_1m_en_5.1.1_3.0_1694648985744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_stackoverflow_comments_1m_en_5.1.1_3.0_1694648985744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_stackoverflow_comments_1m","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_stackoverflow_comments_1m", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_stackoverflow_comments_1m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/giganticode/bert-base-StackOverflow-comments_1M \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_stackoverflow_comments_2m_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_stackoverflow_comments_2m_en.md new file mode 100644 index 00000000000000..083c6b57462ac3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_stackoverflow_comments_2m_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_stackoverflow_comments_2m BertEmbeddings from giganticode +author: John Snow Labs +name: bert_base_stackoverflow_comments_2m +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_stackoverflow_comments_2m` is a English model originally trained by giganticode. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_stackoverflow_comments_2m_en_5.1.1_3.0_1694649245162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_stackoverflow_comments_2m_en_5.1.1_3.0_1694649245162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_stackoverflow_comments_2m","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_stackoverflow_comments_2m", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_stackoverflow_comments_2m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/giganticode/bert-base-StackOverflow-comments_2M \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_standard_bahasa_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_standard_bahasa_cased_en.md new file mode 100644 index 00000000000000..9429486248ee7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_standard_bahasa_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_standard_bahasa_cased BertEmbeddings from mesolitica +author: John Snow Labs +name: bert_base_standard_bahasa_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_standard_bahasa_cased` is a English model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_standard_bahasa_cased_en_5.1.1_3.0_1694586245028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_standard_bahasa_cased_en_5.1.1_3.0_1694586245028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_standard_bahasa_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_standard_bahasa_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_standard_bahasa_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.6 MB| + +## References + +https://huggingface.co/mesolitica/bert-base-standard-bahasa-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_alpha_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_alpha_en.md new file mode 100644 index 00000000000000..ab636f8c757459 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_alpha_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_swedish_cased_alpha BertEmbeddings from KBLab +author: John Snow Labs +name: bert_base_swedish_cased_alpha +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_cased_alpha` is a English model originally trained by KBLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_alpha_en_5.1.1_3.0_1694563762856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_alpha_en_5.1.1_3.0_1694563762856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_swedish_cased_alpha","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_swedish_cased_alpha", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_cased_alpha| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/KBLab/bert-base-swedish-cased-alpha \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_kblab_sv.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_kblab_sv.md new file mode 100644 index 00000000000000..b4368b621b30b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_cased_kblab_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish bert_base_swedish_cased_kblab BertEmbeddings from KBLab +author: John Snow Labs +name: bert_base_swedish_cased_kblab +date: 2023-09-13 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_cased_kblab` is a Swedish model originally trained by KBLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_kblab_sv_5.1.1_3.0_1694563899305.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_cased_kblab_sv_5.1.1_3.0_1694563899305.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_swedish_cased_kblab","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_swedish_cased_kblab", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_cased_kblab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|465.2 MB| + +## References + +https://huggingface.co/KBLab/bert-base-swedish-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_europeana_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_europeana_cased_en.md new file mode 100644 index 00000000000000..f5ea2258f15131 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_europeana_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_swedish_europeana_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_swedish_europeana_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_europeana_cased` is a English model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_europeana_cased_en_5.1.1_3.0_1694598474238.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_europeana_cased_en_5.1.1_3.0_1694598474238.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_swedish_europeana_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_swedish_europeana_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_europeana_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.9 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-swedish-europeana-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_uncased_en.md new file mode 100644 index 00000000000000..84c35b4ed9b18c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_swedish_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_swedish_uncased BertEmbeddings from af-ai-center +author: John Snow Labs +name: bert_base_swedish_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_swedish_uncased` is a English model originally trained by af-ai-center. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_swedish_uncased_en_5.1.1_3.0_1694576951355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_swedish_uncased_en_5.1.1_3.0_1694576951355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_swedish_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_swedish_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_swedish_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/af-ai-center/bert-base-swedish-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_test_en.md new file mode 100644 index 00000000000000..90977abfd25993 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_test BertEmbeddings from Exqrch +author: John Snow Labs +name: bert_base_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_test` is a English model originally trained by Exqrch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_test_en_5.1.1_3.0_1694593295884.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_test_en_5.1.1_3.0_1694593295884.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.6 MB| + +## References + +https://huggingface.co/Exqrch/bert-base-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_token_dropping_dewiki_v1_de.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_token_dropping_dewiki_v1_de.md new file mode 100644 index 00000000000000..d598a22adbfa34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_token_dropping_dewiki_v1_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German bert_base_token_dropping_dewiki_v1 BertEmbeddings from gwlms +author: John Snow Labs +name: bert_base_token_dropping_dewiki_v1 +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_token_dropping_dewiki_v1` is a German model originally trained by gwlms. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_token_dropping_dewiki_v1_de_5.1.1_3.0_1694624494518.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_token_dropping_dewiki_v1_de_5.1.1_3.0_1694624494518.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_token_dropping_dewiki_v1","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_token_dropping_dewiki_v1", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_token_dropping_dewiki_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|412.4 MB| + +## References + +https://huggingface.co/gwlms/bert-base-token-dropping-dewiki-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_128k_cased_offensive_mlm_tr.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_128k_cased_offensive_mlm_tr.md new file mode 100644 index 00000000000000..11bb47a13d09b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_128k_cased_offensive_mlm_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish bert_base_turkish_128k_cased_offensive_mlm BertEmbeddings from Overfit-GM +author: John Snow Labs +name: bert_base_turkish_128k_cased_offensive_mlm +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_turkish_128k_cased_offensive_mlm` is a Turkish model originally trained by Overfit-GM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_turkish_128k_cased_offensive_mlm_tr_5.1.1_3.0_1694606938576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_turkish_128k_cased_offensive_mlm_tr_5.1.1_3.0_1694606938576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_turkish_128k_cased_offensive_mlm","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_turkish_128k_cased_offensive_mlm", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_turkish_128k_cased_offensive_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|688.9 MB| + +## References + +https://huggingface.co/Overfit-GM/bert-base-turkish-128k-cased-offensive-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_128k_uncased_offensive_mlm_tr.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_128k_uncased_offensive_mlm_tr.md new file mode 100644 index 00000000000000..499de2f13074a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_128k_uncased_offensive_mlm_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish bert_base_turkish_128k_uncased_offensive_mlm BertEmbeddings from Overfit-GM +author: John Snow Labs +name: bert_base_turkish_128k_uncased_offensive_mlm +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_turkish_128k_uncased_offensive_mlm` is a Turkish model originally trained by Overfit-GM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_turkish_128k_uncased_offensive_mlm_tr_5.1.1_3.0_1694607873975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_turkish_128k_uncased_offensive_mlm_tr_5.1.1_3.0_1694607873975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_turkish_128k_uncased_offensive_mlm","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_turkish_128k_uncased_offensive_mlm", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_turkish_128k_uncased_offensive_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|689.4 MB| + +## References + +https://huggingface.co/Overfit-GM/bert-base-turkish-128k-uncased-offensive-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_cased_offensive_mlm_tr.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_cased_offensive_mlm_tr.md new file mode 100644 index 00000000000000..c0763e2b347050 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_cased_offensive_mlm_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish bert_base_turkish_cased_offensive_mlm BertEmbeddings from Overfit-GM +author: John Snow Labs +name: bert_base_turkish_cased_offensive_mlm +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_turkish_cased_offensive_mlm` is a Turkish model originally trained by Overfit-GM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_turkish_cased_offensive_mlm_tr_5.1.1_3.0_1694608252958.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_turkish_cased_offensive_mlm_tr_5.1.1_3.0_1694608252958.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_turkish_cased_offensive_mlm","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_turkish_cased_offensive_mlm", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_turkish_cased_offensive_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|412.3 MB| + +## References + +https://huggingface.co/Overfit-GM/bert-base-turkish-cased-offensive-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_uncased_offensive_mlm_tr.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_uncased_offensive_mlm_tr.md new file mode 100644 index 00000000000000..b0f6e5c7475c82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_turkish_uncased_offensive_mlm_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish bert_base_turkish_uncased_offensive_mlm BertEmbeddings from Overfit-GM +author: John Snow Labs +name: bert_base_turkish_uncased_offensive_mlm +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_turkish_uncased_offensive_mlm` is a Turkish model originally trained by Overfit-GM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_turkish_uncased_offensive_mlm_tr_5.1.1_3.0_1694609304508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_turkish_uncased_offensive_mlm_tr_5.1.1_3.0_1694609304508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_turkish_uncased_offensive_mlm","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_turkish_uncased_offensive_mlm", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_turkish_uncased_offensive_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|412.5 MB| + +## References + +https://huggingface.co/Overfit-GM/bert-base-turkish-uncased-offensive-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_1_en.md new file mode 100644 index 00000000000000..5c6c6bdf288220 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_1 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_1` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_1_en_5.1.1_3.0_1694566037618.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_1_en_5.1.1_3.0_1694566037618.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_2_en.md new file mode 100644 index 00000000000000..3cd591e38fdee4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_2 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_2` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_2_en_5.1.1_3.0_1694569462383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_2_en_5.1.1_3.0_1694569462383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_3_en.md new file mode 100644 index 00000000000000..218d2f78c5da77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_3 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_3` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_3_en_5.1.1_3.0_1694569751143.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_3_en_5.1.1_3.0_1694569751143.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_4_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_4_en.md new file mode 100644 index 00000000000000..e5f8c8ef0bcff7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_4 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_4` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_4_en_5.1.1_3.0_1694572974212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_4_en_5.1.1_3.0_1694572974212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_5_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_5_en.md new file mode 100644 index 00000000000000..4a619767e6925b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_5 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_5` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_5_en_5.1.1_3.0_1694574203077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_5_en_5.1.1_3.0_1694574203077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.0 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_6_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_6_en.md new file mode 100644 index 00000000000000..1e515cfb8302f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_habana_test_6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_habana_test_6 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_habana_test_6 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_habana_test_6` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_6_en_5.1.1_3.0_1694574566491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_habana_test_6_en_5.1.1_3.0_1694574566491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_habana_test_6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_habana_test_6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_habana_test_6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.2 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-habana-test-6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_nvidia_test_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_nvidia_test_2_en.md new file mode 100644 index 00000000000000..43f03aba5e7704 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_nvidia_test_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_nvidia_test_2 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_nvidia_test_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_nvidia_test_2` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_nvidia_test_2_en_5.1.1_3.0_1694571908191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_nvidia_test_2_en_5.1.1_3.0_1694571908191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_nvidia_test_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_nvidia_test_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_nvidia_test_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.1 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-nvidia-test-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_nvidia_test_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_nvidia_test_3_en.md new file mode 100644 index 00000000000000..8e27228b1e0818 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2022_nvidia_test_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2022_nvidia_test_3 BertEmbeddings from philschmid +author: John Snow Labs +name: bert_base_uncased_2022_nvidia_test_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2022_nvidia_test_3` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_nvidia_test_3_en_5.1.1_3.0_1694569617047.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2022_nvidia_test_3_en_5.1.1_3.0_1694569617047.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2022_nvidia_test_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2022_nvidia_test_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2022_nvidia_test_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.1 MB| + +## References + +https://huggingface.co/philschmid/bert-base-uncased-2022-nvidia-test-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2_finetuned_rramicus_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2_finetuned_rramicus_en.md new file mode 100644 index 00000000000000..de88314f4642ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_2_finetuned_rramicus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_2_finetuned_rramicus BertEmbeddings from repro-rights-amicus-briefs +author: John Snow Labs +name: bert_base_uncased_2_finetuned_rramicus +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_2_finetuned_rramicus` is a English model originally trained by repro-rights-amicus-briefs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2_finetuned_rramicus_en_5.1.1_3.0_1694636800409.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_2_finetuned_rramicus_en_5.1.1_3.0_1694636800409.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_2_finetuned_rramicus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_2_finetuned_rramicus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_2_finetuned_rramicus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/repro-rights-amicus-briefs/bert-base-uncased-2-finetuned-RRamicus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v1_en.md new file mode 100644 index 00000000000000..89129e45c8ba2b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_ancient_greek_v1 BertEmbeddings from Sonnenblume +author: John Snow Labs +name: bert_base_uncased_ancient_greek_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_ancient_greek_v1` is a English model originally trained by Sonnenblume. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ancient_greek_v1_en_5.1.1_3.0_1694616781198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ancient_greek_v1_en_5.1.1_3.0_1694616781198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_ancient_greek_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_ancient_greek_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_ancient_greek_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.0 MB| + +## References + +https://huggingface.co/Sonnenblume/bert-base-uncased-ancient-greek-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v3_en.md new file mode 100644 index 00000000000000..19179b1becd45f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_ancient_greek_v3 BertEmbeddings from Sonnenblume +author: John Snow Labs +name: bert_base_uncased_ancient_greek_v3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_ancient_greek_v3` is a English model originally trained by Sonnenblume. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ancient_greek_v3_en_5.1.1_3.0_1694617164732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ancient_greek_v3_en_5.1.1_3.0_1694617164732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_ancient_greek_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_ancient_greek_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_ancient_greek_v3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.0 MB| + +## References + +https://huggingface.co/Sonnenblume/bert-base-uncased-ancient-greek-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v4_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v4_en.md new file mode 100644 index 00000000000000..01260c6d80d789 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ancient_greek_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_ancient_greek_v4 BertEmbeddings from Sonnenblume +author: John Snow Labs +name: bert_base_uncased_ancient_greek_v4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_ancient_greek_v4` is a English model originally trained by Sonnenblume. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ancient_greek_v4_en_5.1.1_3.0_1694636309967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ancient_greek_v4_en_5.1.1_3.0_1694636309967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_ancient_greek_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_ancient_greek_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_ancient_greek_v4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.0 MB| + +## References + +https://huggingface.co/Sonnenblume/bert-base-uncased-ancient-greek-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_en.md new file mode 100644 index 00000000000000..02279fa9c971ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_bert_mask_complete_word BertEmbeddings from keshavG +author: John Snow Labs +name: bert_base_uncased_bert_mask_complete_word +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_bert_mask_complete_word` is a English model originally trained by keshavG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_bert_mask_complete_word_en_5.1.1_3.0_1694578511284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_bert_mask_complete_word_en_5.1.1_3.0_1694578511284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_bert_mask_complete_word","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_bert_mask_complete_word", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_bert_mask_complete_word| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/keshavG/bert-base-uncased-bert_mask_complete_word \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_updated_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_updated_vocab_en.md new file mode 100644 index 00000000000000..c5125a4ea23f0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_bert_mask_complete_word_updated_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_bert_mask_complete_word_updated_vocab BertEmbeddings from keshavG +author: John Snow Labs +name: bert_base_uncased_bert_mask_complete_word_updated_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_bert_mask_complete_word_updated_vocab` is a English model originally trained by keshavG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_bert_mask_complete_word_updated_vocab_en_5.1.1_3.0_1694585644846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_bert_mask_complete_word_updated_vocab_en_5.1.1_3.0_1694585644846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_bert_mask_complete_word_updated_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_bert_mask_complete_word_updated_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_bert_mask_complete_word_updated_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|413.5 MB| + +## References + +https://huggingface.co/keshavG/bert-base-uncased-bert_mask_complete_word_updated_vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_binwang_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_binwang_en.md new file mode 100644 index 00000000000000..4a2802f534a14f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_binwang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_binwang BertEmbeddings from binwang +author: John Snow Labs +name: bert_base_uncased_binwang +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_binwang` is a English model originally trained by binwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_binwang_en_5.1.1_3.0_1694588138775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_binwang_en_5.1.1_3.0_1694588138775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_binwang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_binwang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_binwang| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/binwang/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_byeongal_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_byeongal_en.md new file mode 100644 index 00000000000000..36d8b118330731 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_byeongal_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_byeongal BertEmbeddings from byeongal +author: John Snow Labs +name: bert_base_uncased_byeongal +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_byeongal` is a English model originally trained by byeongal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_byeongal_en_5.1.1_3.0_1694590179339.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_byeongal_en_5.1.1_3.0_1694590179339.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_byeongal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_byeongal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_byeongal| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/byeongal/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_contents_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_contents_en.md new file mode 100644 index 00000000000000..134c377b26fe41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_contents_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_contents BertEmbeddings from Contents +author: John Snow Labs +name: bert_base_uncased_contents +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_contents` is a English model originally trained by Contents. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_contents_en_5.1.1_3.0_1694579787819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_contents_en_5.1.1_3.0_1694579787819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_contents","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_contents", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_contents| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Contents/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_copy_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_copy_en.md new file mode 100644 index 00000000000000..26732073ef077e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_copy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_copy BertEmbeddings from osanseviero +author: John Snow Labs +name: bert_base_uncased_copy +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_copy` is a English model originally trained by osanseviero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_copy_en_5.1.1_3.0_1694598665795.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_copy_en_5.1.1_3.0_1694598665795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_copy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_copy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_copy| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/osanseviero/bert-base-uncased-copy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_dstc9_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_dstc9_en.md new file mode 100644 index 00000000000000..5aa5e23b4e62f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_dstc9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_dstc9 BertEmbeddings from wilsontam +author: John Snow Labs +name: bert_base_uncased_dstc9 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_dstc9` is a English model originally trained by wilsontam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_dstc9_en_5.1.1_3.0_1694585476644.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_dstc9_en_5.1.1_3.0_1694585476644.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_dstc9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_dstc9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_dstc9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/wilsontam/bert-base-uncased-dstc9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto5_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto5_en.md new file mode 100644 index 00000000000000..ed245ba4391507 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bert_auto5 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_bert_auto5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bert_auto5` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto5_en_5.1.1_3.0_1694638038292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto5_en_5.1.1_3.0_1694638038292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bert_auto5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bert_auto5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bert_auto5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-bert-auto5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto6_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto6_en.md new file mode 100644 index 00000000000000..7fab8b7bb60418 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bert_auto6 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_bert_auto6 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bert_auto6` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto6_en_5.1.1_3.0_1694638484821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto6_en_5.1.1_3.0_1694638484821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bert_auto6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bert_auto6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bert_auto6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-bert-auto6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto7_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto7_en.md new file mode 100644 index 00000000000000..cee0b7df827814 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bert_auto7 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_bert_auto7 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bert_auto7` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto7_en_5.1.1_3.0_1694638942003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto7_en_5.1.1_3.0_1694638942003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bert_auto7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bert_auto7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bert_auto7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-bert-auto7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto_en.md new file mode 100644 index 00000000000000..93b436cea9e9b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_auto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bert_auto BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_bert_auto +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bert_auto` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto_en_5.1.1_3.0_1694637609034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto_en_5.1.1_3.0_1694637609034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bert_auto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bert_auto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bert_auto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-bert-auto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_mlm9_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_mlm9_en.md new file mode 100644 index 00000000000000..d8c678257f171b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_mlm9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bert_mlm9 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_bert_mlm9 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bert_mlm9` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_mlm9_en_5.1.1_3.0_1694643632914.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_mlm9_en_5.1.1_3.0_1694643632914.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bert_mlm9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bert_mlm9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bert_mlm9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-BERT-mlm9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_mlm_en.md new file mode 100644 index 00000000000000..e238a28cfb0d90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bert_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bert_mlm BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_bert_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bert_mlm` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_mlm_en_5.1.1_3.0_1694640077705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_mlm_en_5.1.1_3.0_1694640077705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bert_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bert_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bert_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-bert-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bertbero_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bertbero_en.md new file mode 100644 index 00000000000000..a51fd851d73000 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_bertbero_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bertbero BertEmbeddings from Transabrar +author: John Snow Labs +name: bert_base_uncased_finetuned_bertbero +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bertbero` is a English model originally trained by Transabrar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bertbero_en_5.1.1_3.0_1694572476667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bertbero_en_5.1.1_3.0_1694572476667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bertbero","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bertbero", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bertbero| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Transabrar/bert-base-uncased-finetuned-bertbero \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_eva_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_eva_accelerate_en.md new file mode 100644 index 00000000000000..ad0bc6388cdfad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_eva_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_eva_accelerate BertEmbeddings from CesarLeblanc +author: John Snow Labs +name: bert_base_uncased_finetuned_eva_accelerate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_eva_accelerate` is a English model originally trained by CesarLeblanc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_eva_accelerate_en_5.1.1_3.0_1694616995090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_eva_accelerate_en_5.1.1_3.0_1694616995090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_eva_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_eva_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_eva_accelerate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|440.6 MB| + +## References + +https://huggingface.co/CesarLeblanc/bert-base-uncased-finetuned-eva-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_eva_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_eva_en.md new file mode 100644 index 00000000000000..7f40930725eb5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_eva_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_eva BertEmbeddings from CesarLeblanc +author: John Snow Labs +name: bert_base_uncased_finetuned_eva +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_eva` is a English model originally trained by CesarLeblanc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_eva_en_5.1.1_3.0_1694616645644.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_eva_en_5.1.1_3.0_1694616645644.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_eva","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_eva", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_eva| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|440.7 MB| + +## References + +https://huggingface.co/CesarLeblanc/bert-base-uncased-finetuned-eva \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_gap_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_gap_en.md new file mode 100644 index 00000000000000..ba346fe26b01d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_gap_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_gap BertEmbeddings from AriyanH +author: John Snow Labs +name: bert_base_uncased_finetuned_gap +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_gap` is a English model originally trained by AriyanH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_gap_en_5.1.1_3.0_1694571134435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_gap_en_5.1.1_3.0_1694571134435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_gap","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_gap", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_gap| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/AriyanH/bert-base-uncased-finetuned-gap \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_auto3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_auto3_en.md new file mode 100644 index 00000000000000..1f5ca3901ccdd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_auto3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_himani_auto3 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_himani_auto3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_himani_auto3` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_auto3_en_5.1.1_3.0_1694637243769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_auto3_en_5.1.1_3.0_1694637243769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_himani_auto3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_himani_auto3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_himani_auto3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-himani-auto3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_12_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_12_en.md new file mode 100644 index 00000000000000..30a52eb8ef2ce8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_12_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_himani_gen_mlm_12 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_himani_gen_mlm_12 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_himani_gen_mlm_12` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_12_en_5.1.1_3.0_1694646419148.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_12_en_5.1.1_3.0_1694646419148.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_himani_gen_mlm_12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_himani_gen_mlm_12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_himani_gen_mlm_12| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-himani-gen-MLM-12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_13_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_13_en.md new file mode 100644 index 00000000000000..025b90f82be72c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_13_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_himani_gen_mlm_13 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_himani_gen_mlm_13 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_himani_gen_mlm_13` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_13_en_5.1.1_3.0_1694647435616.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_13_en_5.1.1_3.0_1694647435616.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_himani_gen_mlm_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_himani_gen_mlm_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_himani_gen_mlm_13| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-himani-gen-MLM-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_14_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_14_en.md new file mode 100644 index 00000000000000..1f93fa43a398d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_14_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_himani_gen_mlm_14 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_himani_gen_mlm_14 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_himani_gen_mlm_14` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_14_en_5.1.1_3.0_1694647735738.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_14_en_5.1.1_3.0_1694647735738.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_himani_gen_mlm_14","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_himani_gen_mlm_14", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_himani_gen_mlm_14| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-himani-gen-MLM-14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_15_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_15_en.md new file mode 100644 index 00000000000000..5d94900e2a8e98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_15_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_himani_gen_mlm_15 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_himani_gen_mlm_15 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_himani_gen_mlm_15` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_15_en_5.1.1_3.0_1694648220318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_15_en_5.1.1_3.0_1694648220318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_himani_gen_mlm_15","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_himani_gen_mlm_15", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_himani_gen_mlm_15| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-himani-gen-MLM-15 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_1_en.md new file mode 100644 index 00000000000000..eec8122a404e29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_himani_gen_mlm_1 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_himani_gen_mlm_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_himani_gen_mlm_1` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_1_en_5.1.1_3.0_1694645767215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_1_en_5.1.1_3.0_1694645767215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_himani_gen_mlm_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_himani_gen_mlm_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_himani_gen_mlm_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-himani-gen-MLM-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_en.md new file mode 100644 index 00000000000000..cb4afbfc9e3672 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_gen_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_himani_gen_mlm BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_himani_gen_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_himani_gen_mlm` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_en_5.1.1_3.0_1694645433865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_gen_mlm_en_5.1.1_3.0_1694645433865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_himani_gen_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_himani_gen_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_himani_gen_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-himani-gen-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_n_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_n_en.md new file mode 100644 index 00000000000000..c7511dadb718eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_himani_n_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_himani_n BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_himani_n +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_himani_n` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_n_en_5.1.1_3.0_1694636833361.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_himani_n_en_5.1.1_3.0_1694636833361.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_himani_n","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_himani_n", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_himani_n| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-himani-n \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_jasheu_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_jasheu_en.md new file mode 100644 index 00000000000000..847243b5266b65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_jasheu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_imdb_jasheu BertEmbeddings from jasheu +author: John Snow Labs +name: bert_base_uncased_finetuned_imdb_jasheu +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_imdb_jasheu` is a English model originally trained by jasheu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_imdb_jasheu_en_5.1.1_3.0_1694624085789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_imdb_jasheu_en_5.1.1_3.0_1694624085789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_imdb_jasheu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_imdb_jasheu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_imdb_jasheu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jasheu/bert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_medhabi_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_medhabi_en.md new file mode 100644 index 00000000000000..9c8c1789981c46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_medhabi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_imdb_medhabi BertEmbeddings from medhabi +author: John Snow Labs +name: bert_base_uncased_finetuned_imdb_medhabi +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_imdb_medhabi` is a English model originally trained by medhabi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_imdb_medhabi_en_5.1.1_3.0_1694626974894.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_imdb_medhabi_en_5.1.1_3.0_1694626974894.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_imdb_medhabi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_imdb_medhabi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_imdb_medhabi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/medhabi/bert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_sarmila_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_sarmila_en.md new file mode 100644 index 00000000000000..0451f1a577d372 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_imdb_sarmila_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_imdb_sarmila BertEmbeddings from Sarmila +author: John Snow Labs +name: bert_base_uncased_finetuned_imdb_sarmila +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_imdb_sarmila` is a English model originally trained by Sarmila. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_imdb_sarmila_en_5.1.1_3.0_1694641541584.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_imdb_sarmila_en_5.1.1_3.0_1694641541584.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_imdb_sarmila","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_imdb_sarmila", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_imdb_sarmila| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Sarmila/bert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_kintweetse_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_kintweetse_en.md new file mode 100644 index 00000000000000..a8e01998a10993 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_kintweetse_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_kintweetse BertEmbeddings from RogerB +author: John Snow Labs +name: bert_base_uncased_finetuned_kintweetse +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_kintweetse` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_kintweetse_en_5.1.1_3.0_1694603450967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_kintweetse_en_5.1.1_3.0_1694603450967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_kintweetse","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_kintweetse", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_kintweetse| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/RogerB/bert-base-uncased-finetuned-kintweetsE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_lexglue_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_lexglue_en.md new file mode 100644 index 00000000000000..8bdea571ad9092 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_lexglue_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_lexglue BertEmbeddings from mayankb96 +author: John Snow Labs +name: bert_base_uncased_finetuned_lexglue +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_lexglue` is a English model originally trained by mayankb96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_lexglue_en_5.1.1_3.0_1694600610326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_lexglue_en_5.1.1_3.0_1694600610326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_lexglue","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_lexglue", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_lexglue| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/mayankb96/bert-base-uncased-finetuned-lexglue \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_rramicus_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_rramicus_en.md new file mode 100644 index 00000000000000..55876135fdce1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuned_rramicus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_rramicus BertEmbeddings from repro-rights-amicus-briefs +author: John Snow Labs +name: bert_base_uncased_finetuned_rramicus +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_rramicus` is a English model originally trained by repro-rights-amicus-briefs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_rramicus_en_5.1.1_3.0_1694567579152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_rramicus_en_5.1.1_3.0_1694567579152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_rramicus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_rramicus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_rramicus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/repro-rights-amicus-briefs/bert-base-uncased-finetuned-RRamicus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuning_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuning_en.md new file mode 100644 index 00000000000000..cedd2b66153091 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_finetuning_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuning BertEmbeddings from shimu +author: John Snow Labs +name: bert_base_uncased_finetuning +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuning` is a English model originally trained by shimu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuning_en_5.1.1_3.0_1694573972903.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuning_en_5.1.1_3.0_1694573972903.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuning","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuning", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuning| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/shimu/bert_base_uncased_finetuning \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ifedrigo_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ifedrigo_en.md new file mode 100644 index 00000000000000..918290da2ad879 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_ifedrigo_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_ifedrigo BertEmbeddings from ifedrigo +author: John Snow Labs +name: bert_base_uncased_ifedrigo +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_ifedrigo` is a English model originally trained by ifedrigo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ifedrigo_en_5.1.1_3.0_1694576407119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_ifedrigo_en_5.1.1_3.0_1694576407119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_ifedrigo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_ifedrigo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_ifedrigo| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|413.2 MB| + +## References + +https://huggingface.co/ifedrigo/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_antoinev17_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_antoinev17_en.md new file mode 100644 index 00000000000000..6c6943aaa25509 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_antoinev17_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_antoinev17 BertEmbeddings from antoinev17 +author: John Snow Labs +name: bert_base_uncased_issues_128_antoinev17 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_antoinev17` is a English model originally trained by antoinev17. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_antoinev17_en_5.1.1_3.0_1694634951894.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_antoinev17_en_5.1.1_3.0_1694634951894.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_antoinev17","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_antoinev17", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_antoinev17| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/antoinev17/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_betelgeux_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_betelgeux_en.md new file mode 100644 index 00000000000000..8cc07f931d8b7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_betelgeux_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_betelgeux BertEmbeddings from betelgeux +author: John Snow Labs +name: bert_base_uncased_issues_128_betelgeux +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_betelgeux` is a English model originally trained by betelgeux. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_betelgeux_en_5.1.1_3.0_1694627127578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_betelgeux_en_5.1.1_3.0_1694627127578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_betelgeux","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_betelgeux", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_betelgeux| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/betelgeux/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_chrispfield_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_chrispfield_en.md new file mode 100644 index 00000000000000..31f2b3f3ae9c6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_chrispfield_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_chrispfield BertEmbeddings from Chrispfield +author: John Snow Labs +name: bert_base_uncased_issues_128_chrispfield +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_chrispfield` is a English model originally trained by Chrispfield. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_chrispfield_en_5.1.1_3.0_1694567399706.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_chrispfield_en_5.1.1_3.0_1694567399706.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_chrispfield","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_chrispfield", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_chrispfield| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Chrispfield/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_cj_mills_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_cj_mills_en.md new file mode 100644 index 00000000000000..bed15b90576773 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_cj_mills_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_cj_mills BertEmbeddings from cj-mills +author: John Snow Labs +name: bert_base_uncased_issues_128_cj_mills +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_cj_mills` is a English model originally trained by cj-mills. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_cj_mills_en_5.1.1_3.0_1694645354456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_cj_mills_en_5.1.1_3.0_1694645354456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_cj_mills","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_cj_mills", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_cj_mills| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/cj-mills/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_coldfir3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_coldfir3_en.md new file mode 100644 index 00000000000000..948d3d2800fd03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_coldfir3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_coldfir3 BertEmbeddings from coldfir3 +author: John Snow Labs +name: bert_base_uncased_issues_128_coldfir3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_coldfir3` is a English model originally trained by coldfir3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_coldfir3_en_5.1.1_3.0_1694594084400.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_coldfir3_en_5.1.1_3.0_1694594084400.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_coldfir3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_coldfir3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_coldfir3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/coldfir3/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_danielvelaj_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_danielvelaj_en.md new file mode 100644 index 00000000000000..0e424c288033d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_danielvelaj_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_danielvelaj BertEmbeddings from DanielVelaJ +author: John Snow Labs +name: bert_base_uncased_issues_128_danielvelaj +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_danielvelaj` is a English model originally trained by DanielVelaJ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_danielvelaj_en_5.1.1_3.0_1694593430068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_danielvelaj_en_5.1.1_3.0_1694593430068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_danielvelaj","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_danielvelaj", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_danielvelaj| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/DanielVelaJ/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_frahman_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_frahman_en.md new file mode 100644 index 00000000000000..ffb41eba22f342 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_frahman_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_frahman BertEmbeddings from frahman +author: John Snow Labs +name: bert_base_uncased_issues_128_frahman +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_frahman` is a English model originally trained by frahman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_frahman_en_5.1.1_3.0_1694627458573.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_frahman_en_5.1.1_3.0_1694627458573.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_frahman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_frahman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_frahman| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/frahman/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_haesun_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_haesun_en.md new file mode 100644 index 00000000000000..5900144020ac72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_haesun_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_haesun BertEmbeddings from haesun +author: John Snow Labs +name: bert_base_uncased_issues_128_haesun +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_haesun` is a English model originally trained by haesun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_haesun_en_5.1.1_3.0_1694579131163.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_haesun_en_5.1.1_3.0_1694579131163.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_haesun","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_haesun", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_haesun| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/haesun/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_hanwoon_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_hanwoon_en.md new file mode 100644 index 00000000000000..45709508e28952 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_hanwoon_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_hanwoon BertEmbeddings from Hanwoon +author: John Snow Labs +name: bert_base_uncased_issues_128_hanwoon +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_hanwoon` is a English model originally trained by Hanwoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_hanwoon_en_5.1.1_3.0_1694619277361.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_hanwoon_en_5.1.1_3.0_1694619277361.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_hanwoon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_hanwoon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_hanwoon| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Hanwoon/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_hudee_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_hudee_en.md new file mode 100644 index 00000000000000..f87d854ac2f1fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_hudee_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_hudee BertEmbeddings from Hudee +author: John Snow Labs +name: bert_base_uncased_issues_128_hudee +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_hudee` is a English model originally trained by Hudee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_hudee_en_5.1.1_3.0_1694637838114.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_hudee_en_5.1.1_3.0_1694637838114.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_hudee","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_hudee", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_hudee| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Hudee/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_issues_128_kiri1701_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_issues_128_kiri1701_en.md new file mode 100644 index 00000000000000..07ee9e81042a40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_issues_128_kiri1701_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_issues_128_kiri1701 BertEmbeddings from kiri1701 +author: John Snow Labs +name: bert_base_uncased_issues_128_issues_128_kiri1701 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_issues_128_kiri1701` is a English model originally trained by kiri1701. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_issues_128_kiri1701_en_5.1.1_3.0_1694564477310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_issues_128_kiri1701_en_5.1.1_3.0_1694564477310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_issues_128_kiri1701","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_issues_128_kiri1701", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_issues_128_kiri1701| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/kiri1701/bert-base-uncased-issues-128-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_jmassot_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_jmassot_en.md new file mode 100644 index 00000000000000..8009f6b354f5cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_jmassot_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_jmassot BertEmbeddings from jmassot +author: John Snow Labs +name: bert_base_uncased_issues_128_jmassot +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_jmassot` is a English model originally trained by jmassot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jmassot_en_5.1.1_3.0_1694587856633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_jmassot_en_5.1.1_3.0_1694587856633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_jmassot","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_jmassot", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_jmassot| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jmassot/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_juandeun_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_juandeun_en.md new file mode 100644 index 00000000000000..f6843fa12ad757 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_juandeun_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_juandeun BertEmbeddings from juandeun +author: John Snow Labs +name: bert_base_uncased_issues_128_juandeun +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_juandeun` is a English model originally trained by juandeun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_juandeun_en_5.1.1_3.0_1694600405699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_juandeun_en_5.1.1_3.0_1694600405699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_juandeun","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_juandeun", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_juandeun| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/juandeun/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_kiki2013_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_kiki2013_en.md new file mode 100644 index 00000000000000..7858ab760fd57f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_kiki2013_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_kiki2013 BertEmbeddings from kiki2013 +author: John Snow Labs +name: bert_base_uncased_issues_128_kiki2013 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_kiki2013` is a English model originally trained by kiki2013. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_kiki2013_en_5.1.1_3.0_1694609694156.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_kiki2013_en_5.1.1_3.0_1694609694156.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_kiki2013","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_kiki2013", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_kiki2013| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/kiki2013/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_lijingxin_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_lijingxin_en.md new file mode 100644 index 00000000000000..0a05f166fad5ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_lijingxin_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_lijingxin BertEmbeddings from lijingxin +author: John Snow Labs +name: bert_base_uncased_issues_128_lijingxin +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_lijingxin` is a English model originally trained by lijingxin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_lijingxin_en_5.1.1_3.0_1694611963447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_lijingxin_en_5.1.1_3.0_1694611963447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_lijingxin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_lijingxin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_lijingxin| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/lijingxin/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_martinwunderlich_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_martinwunderlich_en.md new file mode 100644 index 00000000000000..d2b2cd560a2ea8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_martinwunderlich_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_martinwunderlich BertEmbeddings from martinwunderlich +author: John Snow Labs +name: bert_base_uncased_issues_128_martinwunderlich +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_martinwunderlich` is a English model originally trained by martinwunderlich. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_martinwunderlich_en_5.1.1_3.0_1694633945364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_martinwunderlich_en_5.1.1_3.0_1694633945364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_martinwunderlich","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_martinwunderlich", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_martinwunderlich| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/martinwunderlich/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_munsu_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_munsu_en.md new file mode 100644 index 00000000000000..e1ad51b8371472 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_munsu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_munsu BertEmbeddings from MunSu +author: John Snow Labs +name: bert_base_uncased_issues_128_munsu +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_munsu` is a English model originally trained by MunSu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_munsu_en_5.1.1_3.0_1694586892919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_munsu_en_5.1.1_3.0_1694586892919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_munsu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_munsu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_munsu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/MunSu/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_olpa_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_olpa_en.md new file mode 100644 index 00000000000000..33466e6cf42ab6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_olpa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_olpa BertEmbeddings from olpa +author: John Snow Labs +name: bert_base_uncased_issues_128_olpa +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_olpa` is a English model originally trained by olpa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_olpa_en_5.1.1_3.0_1694631357838.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_olpa_en_5.1.1_3.0_1694631357838.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_olpa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_olpa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_olpa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/olpa/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_roscoyoon_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_roscoyoon_en.md new file mode 100644 index 00000000000000..81e2a4883fdef9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_roscoyoon_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_roscoyoon BertEmbeddings from roscoyoon +author: John Snow Labs +name: bert_base_uncased_issues_128_roscoyoon +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_roscoyoon` is a English model originally trained by roscoyoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_roscoyoon_en_5.1.1_3.0_1694575118085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_roscoyoon_en_5.1.1_3.0_1694575118085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_roscoyoon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_roscoyoon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_roscoyoon| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/roscoyoon/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_shenghao1993_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_shenghao1993_en.md new file mode 100644 index 00000000000000..ca206b9f9f651b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_shenghao1993_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_shenghao1993 BertEmbeddings from Shenghao1993 +author: John Snow Labs +name: bert_base_uncased_issues_128_shenghao1993 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_shenghao1993` is a English model originally trained by Shenghao1993. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_shenghao1993_en_5.1.1_3.0_1694567929037.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_shenghao1993_en_5.1.1_3.0_1694567929037.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_shenghao1993","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_shenghao1993", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_shenghao1993| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Shenghao1993/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_synpjh_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_synpjh_en.md new file mode 100644 index 00000000000000..3fa0e07aac9da5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_synpjh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_synpjh BertEmbeddings from synpjh +author: John Snow Labs +name: bert_base_uncased_issues_128_synpjh +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_synpjh` is a English model originally trained by synpjh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_synpjh_en_5.1.1_3.0_1694585902258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_synpjh_en_5.1.1_3.0_1694585902258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_synpjh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_synpjh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_synpjh| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/synpjh/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_transformersbook_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_transformersbook_en.md new file mode 100644 index 00000000000000..8e4a59eeea3d16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_transformersbook_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_transformersbook BertEmbeddings from transformersbook +author: John Snow Labs +name: bert_base_uncased_issues_128_transformersbook +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_transformersbook` is a English model originally trained by transformersbook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_transformersbook_en_5.1.1_3.0_1694578903746.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_transformersbook_en_5.1.1_3.0_1694578903746.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_transformersbook","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_transformersbook", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_transformersbook| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/transformersbook/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_twidfeel_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_twidfeel_en.md new file mode 100644 index 00000000000000..82bd79398682ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_twidfeel_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_twidfeel BertEmbeddings from twidfeel +author: John Snow Labs +name: bert_base_uncased_issues_128_twidfeel +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_twidfeel` is a English model originally trained by twidfeel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_twidfeel_en_5.1.1_3.0_1694586346305.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_twidfeel_en_5.1.1_3.0_1694586346305.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_twidfeel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_twidfeel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_twidfeel| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/twidfeel/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_xxr_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_xxr_en.md new file mode 100644 index 00000000000000..1ba71ee2d8e2c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_issues_128_xxr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_xxr BertEmbeddings from xxr +author: John Snow Labs +name: bert_base_uncased_issues_128_xxr +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_xxr` is a English model originally trained by xxr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_xxr_en_5.1.1_3.0_1694586835551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_xxr_en_5.1.1_3.0_1694586835551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_xxr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_xxr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_xxr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/xxr/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_lm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_lm_en.md new file mode 100644 index 00000000000000..5a7ec6224cf0d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_lm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_lm BertEmbeddings from iewaij +author: John Snow Labs +name: bert_base_uncased_lm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_lm` is a English model originally trained by iewaij. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_lm_en_5.1.1_3.0_1694578212936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_lm_en_5.1.1_3.0_1694578212936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_lm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_lm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_lm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/iewaij/bert-base-uncased-lm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_mlm_en.md new file mode 100644 index 00000000000000..63f27883f83cc0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_mlm BertEmbeddings from wypoon +author: John Snow Labs +name: bert_base_uncased_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_mlm` is a English model originally trained by wypoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mlm_en_5.1.1_3.0_1694614779984.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mlm_en_5.1.1_3.0_1694614779984.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/wypoon/bert-base-uncased-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_model_attribution_challenge_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_model_attribution_challenge_en.md new file mode 100644 index 00000000000000..aea6646fb8eb77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_model_attribution_challenge_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_model_attribution_challenge BertEmbeddings from model-attribution-challenge +author: John Snow Labs +name: bert_base_uncased_model_attribution_challenge +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_model_attribution_challenge` is a English model originally trained by model-attribution-challenge. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_model_attribution_challenge_en_5.1.1_3.0_1694628303662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_model_attribution_challenge_en_5.1.1_3.0_1694628303662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_model_attribution_challenge","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_model_attribution_challenge", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_model_attribution_challenge| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/model-attribution-challenge/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_multi_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_multi_128_en.md new file mode 100644 index 00000000000000..21d3f1a31b8aef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_multi_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_multi_128 BertEmbeddings from xxr +author: John Snow Labs +name: bert_base_uncased_multi_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_multi_128` is a English model originally trained by xxr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_multi_128_en_5.1.1_3.0_1694625544033.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_multi_128_en_5.1.1_3.0_1694625544033.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_multi_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_multi_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_multi_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/xxr/bert-base-uncased-multi-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_new_data_bert1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_new_data_bert1_en.md new file mode 100644 index 00000000000000..e9875facf050b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_new_data_bert1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_new_data_bert1 BertEmbeddings from Billwzl +author: John Snow Labs +name: bert_base_uncased_new_data_bert1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_new_data_bert1` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_new_data_bert1_en_5.1.1_3.0_1694574719260.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_new_data_bert1_en_5.1.1_3.0_1694574719260.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_new_data_bert1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_new_data_bert1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_new_data_bert1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Billwzl/bert-base-uncased-New_data_bert1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en.md new file mode 100644 index 00000000000000..ab77562c3516e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine BertEmbeddings from spacemanidol +author: John Snow Labs +name: bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine` is a English model originally trained by spacemanidol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en_5.1.1_3.0_1694578896728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine_en_5.1.1_3.0_1694578896728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_noisy_orcas_1.0positive_0.5_negative_margin1.0_cosine| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/spacemanidol/bert-base-uncased-noisy-orcas-1.0positive-0.5-negative-margin1.0-cosine \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_2_en.md new file mode 100644 index 00000000000000..080d928ea56603 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_reviews_2 BertEmbeddings from insaf +author: John Snow Labs +name: bert_base_uncased_reviews_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_reviews_2` is a English model originally trained by insaf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_2_en_5.1.1_3.0_1694580821822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_2_en_5.1.1_3.0_1694580821822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_reviews_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_reviews_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_reviews_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/insaf/bert-base-uncased-reviews-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_3_en.md new file mode 100644 index 00000000000000..bdd2578e56df05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_reviews_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_reviews_3 BertEmbeddings from insaf +author: John Snow Labs +name: bert_base_uncased_reviews_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_reviews_3` is a English model originally trained by insaf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_3_en_5.1.1_3.0_1694581488679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_3_en_5.1.1_3.0_1694581488679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_reviews_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_reviews_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_reviews_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/insaf/bert-base-uncased-reviews-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_rotten_tomatoes_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_rotten_tomatoes_en.md new file mode 100644 index 00000000000000..388ba75ecb9555 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_rotten_tomatoes_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_rotten_tomatoes BertEmbeddings from textattack +author: John Snow Labs +name: bert_base_uncased_rotten_tomatoes +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_rotten_tomatoes` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_rotten_tomatoes_en_5.1.1_3.0_1694577597481.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_rotten_tomatoes_en_5.1.1_3.0_1694577597481.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_rotten_tomatoes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_rotten_tomatoes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_rotten_tomatoes| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/textattack/bert-base-uncased-rotten_tomatoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_sparse_80_1x4_block_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_sparse_80_1x4_block_pruneofa_en.md new file mode 100644 index 00000000000000..498a7d19a4b591 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_sparse_80_1x4_block_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_sparse_80_1x4_block_pruneofa BertEmbeddings from Intel +author: John Snow Labs +name: bert_base_uncased_sparse_80_1x4_block_pruneofa +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_sparse_80_1x4_block_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_80_1x4_block_pruneofa_en_5.1.1_3.0_1694621945507.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_sparse_80_1x4_block_pruneofa_en_5.1.1_3.0_1694621945507.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_sparse_80_1x4_block_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_sparse_80_1x4_block_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_sparse_80_1x4_block_pruneofa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|195.0 MB| + +## References + +https://huggingface.co/Intel/bert-base-uncased-sparse-80-1x4-block-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_swahili_sw.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_swahili_sw.md new file mode 100644 index 00000000000000..38c58fe8d9941e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_swahili_sw.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swahili (macrolanguage) bert_base_uncased_swahili BertEmbeddings from flax-community +author: John Snow Labs +name: bert_base_uncased_swahili +date: 2023-09-13 +tags: [bert, sw, open_source, fill_mask, onnx] +task: Embeddings +language: sw +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_swahili` is a Swahili (macrolanguage) model originally trained by flax-community. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_swahili_sw_5.1.1_3.0_1694642650442.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_swahili_sw_5.1.1_3.0_1694642650442.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_swahili","sw") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_swahili", "sw") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_swahili| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sw| +|Size:|408.0 MB| + +## References + +https://huggingface.co/flax-community/bert-base-uncased-swahili \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_test_en.md new file mode 100644 index 00000000000000..a8382838dcce78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_uncased_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_test BertEmbeddings from Contents +author: John Snow Labs +name: bert_base_uncased_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_test` is a English model originally trained by Contents. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_test_en_5.1.1_3.0_1694584225340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_test_en_5.1.1_3.0_1694584225340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Contents/bert-base-uncased-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_en.md new file mode 100644 index 00000000000000..4397c42a1ae458 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_vn BertEmbeddings from NlpHUST +author: John Snow Labs +name: bert_base_vn +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_vn` is a English model originally trained by NlpHUST. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_vn_en_5.1.1_3.0_1694624251745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_vn_en_5.1.1_3.0_1694624251745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_vn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_vn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_vn| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|498.8 MB| + +## References + +https://huggingface.co/NlpHUST/bert-base-vn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_finetuned_portuguese_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_finetuned_portuguese_en.md new file mode 100644 index 00000000000000..f672f2eb5369a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_vn_finetuned_portuguese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_vn_finetuned_portuguese BertEmbeddings from dotansang +author: John Snow Labs +name: bert_base_vn_finetuned_portuguese +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_vn_finetuned_portuguese` is a English model originally trained by dotansang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_vn_finetuned_portuguese_en_5.1.1_3.0_1694582275769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_vn_finetuned_portuguese_en_5.1.1_3.0_1694582275769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_vn_finetuned_portuguese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_vn_finetuned_portuguese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_vn_finetuned_portuguese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|498.8 MB| + +## References + +https://huggingface.co/dotansang/bert-base-vn-finetuned-pt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_base_yc_recipe_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_base_yc_recipe_30_en.md new file mode 100644 index 00000000000000..214621443f1dfb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_base_yc_recipe_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_yc_recipe_30 BertEmbeddings from CennetOguz +author: John Snow Labs +name: bert_base_yc_recipe_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_yc_recipe_30` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_yc_recipe_30_en_5.1.1_3.0_1694568535972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_yc_recipe_30_en_5.1.1_3.0_1694568535972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_yc_recipe_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_yc_recipe_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_yc_recipe_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/CennetOguz/bert_base_yc_recipe_30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_based_answer_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_based_answer_model_en.md new file mode 100644 index 00000000000000..46cfd40d698bc4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_based_answer_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_based_answer_model BertEmbeddings from Kunjesh07 +author: John Snow Labs +name: bert_based_answer_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_based_answer_model` is a English model originally trained by Kunjesh07. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_based_answer_model_en_5.1.1_3.0_1694616027282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_based_answer_model_en_5.1.1_3.0_1694616027282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_based_answer_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_based_answer_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_based_answer_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Kunjesh07/Bert-Based-Answer-Model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_based_restaurant_review_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_based_restaurant_review_en.md new file mode 100644 index 00000000000000..bcda9e00f0aade --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_based_restaurant_review_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_based_restaurant_review BertEmbeddings from River-jh +author: John Snow Labs +name: bert_based_restaurant_review +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_based_restaurant_review` is a English model originally trained by River-jh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_based_restaurant_review_en_5.1.1_3.0_1694572475763.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_based_restaurant_review_en_5.1.1_3.0_1694572475763.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_based_restaurant_review","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_based_restaurant_review", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_based_restaurant_review| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/River-jh/bert-based-restaurant-review \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_cluster_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_cluster_en.md new file mode 100644 index 00000000000000..29a284c498eb04 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_cluster_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_cluster BertEmbeddings from mipatov +author: John Snow Labs +name: bert_cluster +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cluster` is a English model originally trained by mipatov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cluster_en_5.1.1_3.0_1694647045460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cluster_en_5.1.1_3.0_1694647045460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_cluster","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_cluster", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cluster| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.9 MB| + +## References + +https://huggingface.co/mipatov/bert_cluster \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_cn_wudi7758521521_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_cn_wudi7758521521_en.md new file mode 100644 index 00000000000000..5761861a3274d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_cn_wudi7758521521_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_cn_wudi7758521521 BertEmbeddings from wudi7758521521 +author: John Snow Labs +name: bert_cn_wudi7758521521 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_cn_wudi7758521521` is a English model originally trained by wudi7758521521. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_cn_wudi7758521521_en_5.1.1_3.0_1694585808653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_cn_wudi7758521521_en_5.1.1_3.0_1694585808653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_cn_wudi7758521521","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_cn_wudi7758521521", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_cn_wudi7758521521| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wudi7758521521/bert_cn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_en.md new file mode 100644 index 00000000000000..1d1bd83784da47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_concat_2 BertEmbeddings from NasimB +author: John Snow Labs +name: bert_concat_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_concat_2` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_concat_2_en_5.1.1_3.0_1694586672241.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_concat_2_en_5.1.1_3.0_1694586672241.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_concat_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_concat_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_concat_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.4 MB| + +## References + +https://huggingface.co/NasimB/bert-concat-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_finetune_simcse_truncate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_finetune_simcse_truncate_en.md new file mode 100644 index 00000000000000..7dfa950dce2240 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_2_finetune_simcse_truncate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_concat_2_finetune_simcse_truncate BertEmbeddings from NasimB +author: John Snow Labs +name: bert_concat_2_finetune_simcse_truncate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_concat_2_finetune_simcse_truncate` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_concat_2_finetune_simcse_truncate_en_5.1.1_3.0_1694588304153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_concat_2_finetune_simcse_truncate_en_5.1.1_3.0_1694588304153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_concat_2_finetune_simcse_truncate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_concat_2_finetune_simcse_truncate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_concat_2_finetune_simcse_truncate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|399.9 MB| + +## References + +https://huggingface.co/NasimB/bert-concat-2-finetune-simcse-truncate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_en.md new file mode 100644 index 00000000000000..84ec7e94261b5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_concat_3 BertEmbeddings from NasimB +author: John Snow Labs +name: bert_concat_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_concat_3` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_concat_3_en_5.1.1_3.0_1694587599955.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_concat_3_en_5.1.1_3.0_1694587599955.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_concat_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_concat_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_concat_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/NasimB/bert-concat-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_finetune_simcse_truncate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_finetune_simcse_truncate_en.md new file mode 100644 index 00000000000000..40ade906ae5fb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_concat_3_finetune_simcse_truncate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_concat_3_finetune_simcse_truncate BertEmbeddings from NasimB +author: John Snow Labs +name: bert_concat_3_finetune_simcse_truncate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_concat_3_finetune_simcse_truncate` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_concat_3_finetune_simcse_truncate_en_5.1.1_3.0_1694588145224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_concat_3_finetune_simcse_truncate_en_5.1.1_3.0_1694588145224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_concat_3_finetune_simcse_truncate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_concat_3_finetune_simcse_truncate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_concat_3_finetune_simcse_truncate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|401.0 MB| + +## References + +https://huggingface.co/NasimB/bert-concat-3-finetune-simcse-truncate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_csl_gold8k_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_csl_gold8k_en.md new file mode 100644 index 00000000000000..453130fb2c2cb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_csl_gold8k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_csl_gold8k BertEmbeddings from subbareddyiiit +author: John Snow Labs +name: bert_csl_gold8k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_csl_gold8k` is a English model originally trained by subbareddyiiit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_csl_gold8k_en_5.1.1_3.0_1694575569274.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_csl_gold8k_en_5.1.1_3.0_1694575569274.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_csl_gold8k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_csl_gold8k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_csl_gold8k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/subbareddyiiit/bert_csl_gold8k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_dk_laptop_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_dk_laptop_en.md new file mode 100644 index 00000000000000..184b207184d8e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_dk_laptop_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_dk_laptop BertEmbeddings from activebus +author: John Snow Labs +name: bert_dk_laptop +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_dk_laptop` is a English model originally trained by activebus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_dk_laptop_en_5.1.1_3.0_1694575473487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_dk_laptop_en_5.1.1_3.0_1694575473487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_dk_laptop","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_dk_laptop", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_dk_laptop| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.4 MB| + +## References + +https://huggingface.co/activebus/BERT-DK_laptop \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_dk_rest_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_dk_rest_en.md new file mode 100644 index 00000000000000..478cf5f6fe2faa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_dk_rest_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_dk_rest BertEmbeddings from activebus +author: John Snow Labs +name: bert_dk_rest +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_dk_rest` is a English model originally trained by activebus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_dk_rest_en_5.1.1_3.0_1694575729183.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_dk_rest_en_5.1.1_3.0_1694575729183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_dk_rest","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_dk_rest", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_dk_rest| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.3 MB| + +## References + +https://huggingface.co/activebus/BERT-DK_rest \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_double_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_double_en.md new file mode 100644 index 00000000000000..4346874aebf859 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_double_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_double BertEmbeddings from casehold +author: John Snow Labs +name: bert_double +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_double` is a English model originally trained by casehold. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_double_en_5.1.1_3.0_1694597248105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_double_en_5.1.1_3.0_1694597248105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_double","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_double", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_double| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/casehold/bert-double \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_dp_4_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_dp_4_en.md new file mode 100644 index 00000000000000..bbe1ef3293485c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_dp_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_dp_4 BertEmbeddings from NasimB +author: John Snow Labs +name: bert_dp_4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_dp_4` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_dp_4_en_5.1.1_3.0_1694588456919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_dp_4_en_5.1.1_3.0_1694588456919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_dp_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_dp_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_dp_4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/NasimB/bert-dp-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_02_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_02_en.md new file mode 100644 index 00000000000000..6b2382812cd515 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_02_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_embding_finetuned_spmlm_02 BertEmbeddings from ashwathjadhav23 +author: John Snow Labs +name: bert_embding_finetuned_spmlm_02 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_embding_finetuned_spmlm_02` is a English model originally trained by ashwathjadhav23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embding_finetuned_spmlm_02_en_5.1.1_3.0_1694585695765.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embding_finetuned_spmlm_02_en_5.1.1_3.0_1694585695765.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_embding_finetuned_spmlm_02","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_embding_finetuned_spmlm_02", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embding_finetuned_spmlm_02| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/ashwathjadhav23/Bert_Embding_Finetuned_SpMLM_02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_en.md new file mode 100644 index 00000000000000..4a7dda31e29a2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_embding_finetuned_spmlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_embding_finetuned_spmlm BertEmbeddings from ashwathjadhav23 +author: John Snow Labs +name: bert_embding_finetuned_spmlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_embding_finetuned_spmlm` is a English model originally trained by ashwathjadhav23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_embding_finetuned_spmlm_en_5.1.1_3.0_1694585297333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_embding_finetuned_spmlm_en_5.1.1_3.0_1694585297333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_embding_finetuned_spmlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_embding_finetuned_spmlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_embding_finetuned_spmlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/ashwathjadhav23/Bert_Embding_Finetuned_SpMLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetune_simcse_truncate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetune_simcse_truncate_en.md new file mode 100644 index 00000000000000..f911da42bcbdf5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetune_simcse_truncate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetune_simcse_truncate BertEmbeddings from NasimB +author: John Snow Labs +name: bert_finetune_simcse_truncate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetune_simcse_truncate` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetune_simcse_truncate_en_5.1.1_3.0_1694587925442.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetune_simcse_truncate_en_5.1.1_3.0_1694587925442.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetune_simcse_truncate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetune_simcse_truncate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetune_simcse_truncate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/NasimB/bert-finetune-simcse-truncate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test1_en.md new file mode 100644 index 00000000000000..22ee728365f6b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test1 BertEmbeddings from bill +author: John Snow Labs +name: bert_finetuning_test1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test1` is a English model originally trained by bill. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test1_en_5.1.1_3.0_1694587604269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test1_en_5.1.1_3.0_1694587604269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/bill/bert_finetuning_test1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_0925_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_0925_en.md new file mode 100644 index 00000000000000..13bdf930b47782 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_0925_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test_0925 BertEmbeddings from wbmitcast +author: John Snow Labs +name: bert_finetuning_test_0925 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_0925` is a English model originally trained by wbmitcast. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_0925_en_5.1.1_3.0_1694583057346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_0925_en_5.1.1_3.0_1694583057346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test_0925","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test_0925", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_0925| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wbmitcast/bert_finetuning_test_0925 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_mine_result_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_mine_result_en.md new file mode 100644 index 00000000000000..5a4c08c126c555 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_mine_result_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test_mine_result BertEmbeddings from Martinlabla +author: John Snow Labs +name: bert_finetuning_test_mine_result +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_mine_result` is a English model originally trained by Martinlabla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_mine_result_en_5.1.1_3.0_1694567497197.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_mine_result_en_5.1.1_3.0_1694567497197.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test_mine_result","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test_mine_result", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_mine_result| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Martinlabla/bert_finetuning_test_mine_result \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_xiejiafang_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_xiejiafang_en.md new file mode 100644 index 00000000000000..1def6ee443a447 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_xiejiafang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test_xiejiafang BertEmbeddings from xiejiafang +author: John Snow Labs +name: bert_finetuning_test_xiejiafang +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_xiejiafang` is a English model originally trained by xiejiafang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_xiejiafang_en_5.1.1_3.0_1694586563453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_xiejiafang_en_5.1.1_3.0_1694586563453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test_xiejiafang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test_xiejiafang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_xiejiafang| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/xiejiafang/bert_finetuning_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_zqf03118_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_zqf03118_en.md new file mode 100644 index 00000000000000..7b641eeeba3434 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_finetuning_test_zqf03118_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_finetuning_test_zqf03118 BertEmbeddings from zqf03118 +author: John Snow Labs +name: bert_finetuning_test_zqf03118 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_finetuning_test_zqf03118` is a English model originally trained by zqf03118. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_zqf03118_en_5.1.1_3.0_1694599186944.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_finetuning_test_zqf03118_en_5.1.1_3.0_1694599186944.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_finetuning_test_zqf03118","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_finetuning_test_zqf03118", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_finetuning_test_zqf03118| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/zqf03118/bert_finetuning_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_fintuning_test1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_fintuning_test1_en.md new file mode 100644 index 00000000000000..421adfc07a9f4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_fintuning_test1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_fintuning_test1 BertEmbeddings from ZhaoyiGUAN +author: John Snow Labs +name: bert_fintuning_test1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_fintuning_test1` is a English model originally trained by ZhaoyiGUAN. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_fintuning_test1_en_5.1.1_3.0_1694574944830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_fintuning_test1_en_5.1.1_3.0_1694574944830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_fintuning_test1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_fintuning_test1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_fintuning_test1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/ZhaoyiGUAN/Bert_Fintuning_Test1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_big_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_big_en.md new file mode 100644 index 00000000000000..9076731484a34a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_big_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_hinglish_big BertEmbeddings from aditeyabaral +author: John Snow Labs +name: bert_hinglish_big +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_hinglish_big` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_hinglish_big_en_5.1.1_3.0_1694576522327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_hinglish_big_en_5.1.1_3.0_1694576522327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_hinglish_big","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_hinglish_big", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_hinglish_big| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/aditeyabaral/bert-hinglish-big \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_small_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_small_en.md new file mode 100644 index 00000000000000..b6ca09020bdff6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_hinglish_small_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_hinglish_small BertEmbeddings from aditeyabaral +author: John Snow Labs +name: bert_hinglish_small +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_hinglish_small` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_hinglish_small_en_5.1.1_3.0_1694576634952.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_hinglish_small_en_5.1.1_3.0_1694576634952.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_hinglish_small","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_hinglish_small", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_hinglish_small| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/aditeyabaral/bert-hinglish-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_hs_idpt_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_hs_idpt_en.md new file mode 100644 index 00000000000000..a0c3a1346cf118 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_hs_idpt_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_hs_idpt BertEmbeddings from SmartPy +author: John Snow Labs +name: bert_hs_idpt +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_hs_idpt` is a English model originally trained by SmartPy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_hs_idpt_en_5.1.1_3.0_1694607979355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_hs_idpt_en_5.1.1_3.0_1694607979355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_hs_idpt","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_hs_idpt", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_hs_idpt| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/SmartPy/bert-hs-idpt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_java_bfp_single_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_java_bfp_single_en.md new file mode 100644 index 00000000000000..b4bb0eeb54081b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_java_bfp_single_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_java_bfp_single BertEmbeddings from up201806461 +author: John Snow Labs +name: bert_java_bfp_single +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_java_bfp_single` is a English model originally trained by up201806461. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_java_bfp_single_en_5.1.1_3.0_1694578962374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_java_bfp_single_en_5.1.1_3.0_1694578962374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_java_bfp_single","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_java_bfp_single", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_java_bfp_single| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/up201806461/bert-java-bfp_single \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_kor_base_pz_language_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_kor_base_pz_language_test_en.md new file mode 100644 index 00000000000000..ec736e433c0d7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_kor_base_pz_language_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_kor_base_pz_language_test BertEmbeddings from Hanwoon +author: John Snow Labs +name: bert_kor_base_pz_language_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_kor_base_pz_language_test` is a English model originally trained by Hanwoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_kor_base_pz_language_test_en_5.1.1_3.0_1694620643379.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_kor_base_pz_language_test_en_5.1.1_3.0_1694620643379.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_kor_base_pz_language_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_kor_base_pz_language_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_kor_base_pz_language_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.1 MB| + +## References + +https://huggingface.co/Hanwoon/bert-kor-base-pz-language-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h240_a12_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h240_a12_en.md new file mode 100644 index 00000000000000..17ce53d63b13f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h240_a12_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_l12_h240_a12 BertEmbeddings from eli4s +author: John Snow Labs +name: bert_l12_h240_a12 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_l12_h240_a12` is a English model originally trained by eli4s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_l12_h240_a12_en_5.1.1_3.0_1694629467102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_l12_h240_a12_en_5.1.1_3.0_1694629467102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_l12_h240_a12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_l12_h240_a12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_l12_h240_a12| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|105.1 MB| + +## References + +https://huggingface.co/eli4s/Bert-L12-h240-A12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h256_a4_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h256_a4_en.md new file mode 100644 index 00000000000000..63e2963fe9b7ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h256_a4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_l12_h256_a4 BertEmbeddings from eli4s +author: John Snow Labs +name: bert_l12_h256_a4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_l12_h256_a4` is a English model originally trained by eli4s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_l12_h256_a4_en_5.1.1_3.0_1694629695737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_l12_h256_a4_en_5.1.1_3.0_1694629695737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_l12_h256_a4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_l12_h256_a4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_l12_h256_a4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|112.8 MB| + +## References + +https://huggingface.co/eli4s/Bert-L12-h256-A4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h384_a6_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h384_a6_en.md new file mode 100644 index 00000000000000..2bda8fd14d7977 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_l12_h384_a6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_l12_h384_a6 BertEmbeddings from eli4s +author: John Snow Labs +name: bert_l12_h384_a6 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_l12_h384_a6` is a English model originally trained by eli4s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_l12_h384_a6_en_5.1.1_3.0_1694629972509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_l12_h384_a6_en_5.1.1_3.0_1694629972509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_l12_h384_a6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_l12_h384_a6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_l12_h384_a6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|177.8 MB| + +## References + +https://huggingface.co/eli4s/Bert-L12-h384-A6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_labor_space_token_512_batch_8_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_labor_space_token_512_batch_8_en.md new file mode 100644 index 00000000000000..cef4e1056540e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_labor_space_token_512_batch_8_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_labor_space_token_512_batch_8 BertEmbeddings from seongwoon +author: John Snow Labs +name: bert_labor_space_token_512_batch_8 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_labor_space_token_512_batch_8` is a English model originally trained by seongwoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_labor_space_token_512_batch_8_en_5.1.1_3.0_1694591397056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_labor_space_token_512_batch_8_en_5.1.1_3.0_1694591397056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_labor_space_token_512_batch_8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_labor_space_token_512_batch_8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_labor_space_token_512_batch_8| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/seongwoon/Bert_labor_space_token_512_batch_8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv02_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv02_ar.md new file mode 100644 index 00000000000000..d9d37240f60859 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv02_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_large_arabertv02 BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_large_arabertv02 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_arabertv02` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_arabertv02_ar_5.1.1_3.0_1694584023166.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_arabertv02_ar_5.1.1_3.0_1694584023166.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_arabertv02","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_arabertv02", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_arabertv02| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|1.4 GB| + +## References + +https://huggingface.co/aubmindlab/bert-large-arabertv02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv02_twitter_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv02_twitter_ar.md new file mode 100644 index 00000000000000..3b4cdb91d7e071 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv02_twitter_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_large_arabertv02_twitter BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_large_arabertv02_twitter +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_arabertv02_twitter` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_arabertv02_twitter_ar_5.1.1_3.0_1694583674089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_arabertv02_twitter_ar_5.1.1_3.0_1694583674089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_arabertv02_twitter","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_arabertv02_twitter", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_arabertv02_twitter| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|1.4 GB| + +## References + +https://huggingface.co/aubmindlab/bert-large-arabertv02-twitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv2_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv2_ar.md new file mode 100644 index 00000000000000..f11181a70680fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabertv2_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_large_arabertv2 BertEmbeddings from aubmindlab +author: John Snow Labs +name: bert_large_arabertv2 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_arabertv2` is a Arabic model originally trained by aubmindlab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_arabertv2_ar_5.1.1_3.0_1694584366192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_arabertv2_ar_5.1.1_3.0_1694584366192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_arabertv2","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_arabertv2", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_arabertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|1.4 GB| + +## References + +https://huggingface.co/aubmindlab/bert-large-arabertv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabic_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabic_ar.md new file mode 100644 index 00000000000000..48d1326005cf80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_arabic_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_large_arabic BertEmbeddings from asafaya +author: John Snow Labs +name: bert_large_arabic +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_arabic` is a Arabic model originally trained by asafaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_arabic_ar_5.1.1_3.0_1694582290621.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_arabic_ar_5.1.1_3.0_1694582290621.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_arabic","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_arabic", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|1.3 GB| + +## References + +https://huggingface.co/asafaya/bert-large-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_cased_da_20_en.md new file mode 100644 index 00000000000000..cf89ae83f44e9e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_cased_da_20_en_5.1.1_3.0_1694564770508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_cased_da_20_en_5.1.1_3.0_1694564770508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_hkdse_english_paper4_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_hkdse_english_paper4_en.md new file mode 100644 index 00000000000000..1906ba067e6f5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_hkdse_english_paper4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_hkdse_english_paper4 BertEmbeddings from Wootang01 +author: John Snow Labs +name: bert_large_cased_finetuned_hkdse_english_paper4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_hkdse_english_paper4` is a English model originally trained by Wootang01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_hkdse_english_paper4_en_5.1.1_3.0_1694647633988.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_hkdse_english_paper4_en_5.1.1_3.0_1694647633988.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_hkdse_english_paper4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_hkdse_english_paper4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_hkdse_english_paper4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/Wootang01/bert-large-cased-finetuned-hkdse-english-paper4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low100_0_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low100_0_cased_da_20_en.md new file mode 100644 index 00000000000000..1d95bcf3764694 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low100_0_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_low100_0_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_low100_0_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_low100_0_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low100_0_cased_da_20_en_5.1.1_3.0_1694565992407.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low100_0_cased_da_20_en_5.1.1_3.0_1694565992407.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_low100_0_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_low100_0_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_low100_0_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-low100-0-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low10_0_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low10_0_cased_da_20_en.md new file mode 100644 index 00000000000000..79f3eb938bfb36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low10_0_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_low10_0_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_low10_0_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_low10_0_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low10_0_cased_da_20_en_5.1.1_3.0_1694565663480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low10_0_cased_da_20_en_5.1.1_3.0_1694565663480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_low10_0_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_low10_0_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_low10_0_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-low10-0-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_1_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_1_cased_da_20_en.md new file mode 100644 index 00000000000000..742afa69b26391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_1_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_low20_1_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_low20_1_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_low20_1_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low20_1_cased_da_20_en_5.1.1_3.0_1694565382077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low20_1_cased_da_20_en_5.1.1_3.0_1694565382077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_low20_1_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_low20_1_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_low20_1_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-low20-1-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_cased_da_20_en.md new file mode 100644 index 00000000000000..5628db32798146 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_low20_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_low20_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_low20_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_low20_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low20_cased_da_20_en_5.1.1_3.0_1694565075875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_low20_cased_da_20_en_5.1.1_3.0_1694565075875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_low20_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_low20_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_low20_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-low20-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_20_en.md new file mode 100644 index 00000000000000..bf2d0cdadfa73c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr100_0_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr100_0_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr100_0_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_0_cased_da_20_en_5.1.1_3.0_1694566867523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_0_cased_da_20_en_5.1.1_3.0_1694566867523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr100_0_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr100_0_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr100_0_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR100-0-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_40_en.md new file mode 100644 index 00000000000000..7c957e0565e050 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_0_cased_da_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr100_0_cased_da_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr100_0_cased_da_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr100_0_cased_da_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_0_cased_da_40_en_5.1.1_3.0_1694567194862.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_0_cased_da_40_en_5.1.1_3.0_1694567194862.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr100_0_cased_da_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr100_0_cased_da_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr100_0_cased_da_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR100-0-cased-DA-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_2_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_2_cased_da_20_en.md new file mode 100644 index 00000000000000..9dc2a676cb3740 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_2_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr100_2_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr100_2_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr100_2_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_2_cased_da_20_en_5.1.1_3.0_1694567487345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_2_cased_da_20_en_5.1.1_3.0_1694567487345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr100_2_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr100_2_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr100_2_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR100-2-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_3_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_3_cased_da_20_en.md new file mode 100644 index 00000000000000..c5885ec396d33b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr100_3_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr100_3_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr100_3_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr100_3_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_3_cased_da_20_en_5.1.1_3.0_1694568354875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr100_3_cased_da_20_en_5.1.1_3.0_1694568354875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr100_3_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr100_3_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr100_3_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR100-3-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr10_0_cased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr10_0_cased_da_20_en.md new file mode 100644 index 00000000000000..6b434478dd0b6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_finetuned_lowr10_0_cased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_finetuned_lowr10_0_cased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_finetuned_lowr10_0_cased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_finetuned_lowr10_0_cased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr10_0_cased_da_20_en_5.1.1_3.0_1694566558054.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_finetuned_lowr10_0_cased_da_20_en_5.1.1_3.0_1694566558054.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_finetuned_lowr10_0_cased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_finetuned_lowr10_0_cased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_finetuned_lowr10_0_cased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-finetuned-lowR10-0-cased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_luciolrv_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_luciolrv_en.md new file mode 100644 index 00000000000000..fc2d653e3bd167 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_luciolrv_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_portuguese_lenerbr_luciolrv BertEmbeddings from luciolrv +author: John Snow Labs +name: bert_large_cased_portuguese_lenerbr_luciolrv +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_portuguese_lenerbr_luciolrv` is a English model originally trained by luciolrv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_lenerbr_luciolrv_en_5.1.1_3.0_1694584926995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_lenerbr_luciolrv_en_5.1.1_3.0_1694584926995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_portuguese_lenerbr_luciolrv","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_portuguese_lenerbr_luciolrv", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_portuguese_lenerbr_luciolrv| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/luciolrv/bert-large-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_pt.md new file mode 100644 index 00000000000000..c79af0651c7296 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bert_large_cased_portuguese_lenerbr BertEmbeddings from pierreguillou +author: John Snow Labs +name: bert_large_cased_portuguese_lenerbr +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_portuguese_lenerbr` is a Portuguese model originally trained by pierreguillou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_lenerbr_pt_5.1.1_3.0_1694563642184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_lenerbr_pt_5.1.1_3.0_1694563642184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_portuguese_lenerbr","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_portuguese_lenerbr", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_portuguese_lenerbr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|1.2 GB| + +## References + +https://huggingface.co/pierreguillou/bert-large-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_vittorio_girardi_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_vittorio_girardi_en.md new file mode 100644 index 00000000000000..2d00656cbff7e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_portuguese_lenerbr_vittorio_girardi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_portuguese_lenerbr_vittorio_girardi BertEmbeddings from vittorio-girardi +author: John Snow Labs +name: bert_large_cased_portuguese_lenerbr_vittorio_girardi +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_portuguese_lenerbr_vittorio_girardi` is a English model originally trained by vittorio-girardi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_lenerbr_vittorio_girardi_en_5.1.1_3.0_1694635205835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_portuguese_lenerbr_vittorio_girardi_en_5.1.1_3.0_1694635205835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_portuguese_lenerbr_vittorio_girardi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_portuguese_lenerbr_vittorio_girardi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_portuguese_lenerbr_vittorio_girardi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/vittorio-girardi/bert-large-cased-pt-lenerbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_full_norwegian_label_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_full_norwegian_label_20_en.md new file mode 100644 index 00000000000000..fc968c1493760b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_full_norwegian_label_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_full_norwegian_label_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_full_norwegian_label_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_full_norwegian_label_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_full_norwegian_label_20_en_5.1.1_3.0_1694571475257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_full_norwegian_label_20_en_5.1.1_3.0_1694571475257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_full_norwegian_label_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_full_norwegian_label_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_full_norwegian_label_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-full-no-label-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_cased_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_cased_20_en.md new file mode 100644 index 00000000000000..2a6aeb41314249 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_cased_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_lr100_0_cased_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_lr100_0_cased_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_lr100_0_cased_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_0_cased_20_en_5.1.1_3.0_1694569625563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_0_cased_20_en_5.1.1_3.0_1694569625563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_lr100_0_cased_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_lr100_0_cased_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_lr100_0_cased_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-LR100-0-cased-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_prepend_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_prepend_40_en.md new file mode 100644 index 00000000000000..9714c08d2e10eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_0_prepend_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_lr100_0_prepend_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_lr100_0_prepend_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_lr100_0_prepend_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_0_prepend_40_en_5.1.1_3.0_1694570580287.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_0_prepend_40_en_5.1.1_3.0_1694570580287.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_lr100_0_prepend_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_lr100_0_prepend_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_lr100_0_prepend_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-LR100-0-prepend-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_150_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_150_en.md new file mode 100644 index 00000000000000..6d946f77f19d95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_150_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_lr100_1_cased_150 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_lr100_1_cased_150 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_lr100_1_cased_150` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_1_cased_150_en_5.1.1_3.0_1694569943900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_1_cased_150_en_5.1.1_3.0_1694569943900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_lr100_1_cased_150","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_lr100_1_cased_150", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_lr100_1_cased_150| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-LR100-1-cased-150 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_40_en.md new file mode 100644 index 00000000000000..15e5bc25cdec34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr100_1_cased_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_lr100_1_cased_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_lr100_1_cased_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_lr100_1_cased_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_1_cased_40_en_5.1.1_3.0_1694569306149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr100_1_cased_40_en_5.1.1_3.0_1694569306149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_lr100_1_cased_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_lr100_1_cased_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_lr100_1_cased_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-LR100-1-cased-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr10_1_prepend_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr10_1_prepend_20_en.md new file mode 100644 index 00000000000000..937bf1fbe4ffc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_lr10_1_prepend_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_lr10_1_prepend_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_lr10_1_prepend_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_lr10_1_prepend_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr10_1_prepend_20_en_5.1.1_3.0_1694570247498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_lr10_1_prepend_20_en_5.1.1_3.0_1694570247498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_lr10_1_prepend_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_lr10_1_prepend_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_lr10_1_prepend_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-LR10-1-prepend-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_en.md new file mode 100644 index 00000000000000..32011553115ddb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_norwegian_label_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_norwegian_label_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_norwegian_label_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_20_en_5.1.1_3.0_1694571918630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_20_en_5.1.1_3.0_1694571918630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_norwegian_label_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_norwegian_label_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_norwegian_label_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-no-label-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en.md new file mode 100644 index 00000000000000..ff2b3585327705 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en_5.1.1_3.0_1694572892853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30_en_5.1.1_3.0_1694572892853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_norwegian_label_20_sigir_tune2nd_lr10_labelled_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-no-label-20-sigir-tune2nd-LR10-labelled-30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_en.md new file mode 100644 index 00000000000000..b5c2321004f89c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_norwegian_label_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_norwegian_label_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_norwegian_label_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_40_en_5.1.1_3.0_1694572396505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_40_en_5.1.1_3.0_1694572396505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_norwegian_label_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_norwegian_label_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_norwegian_label_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-no-label-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en.md new file mode 100644 index 00000000000000..c8a51f528afb3e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en_5.1.1_3.0_1694573387926.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30_en_5.1.1_3.0_1694573387926.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr100_labelled_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-no-label-40-sigir-tune2nd-LR100-labelled-30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40_en.md new file mode 100644 index 00000000000000..1077a6ead45398 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40_en_5.1.1_3.0_1694573805864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40_en_5.1.1_3.0_1694573805864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_norwegian_label_40_sigir_tune2nd_lr10_labelled_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-no-label-40-sigir-tune2nd-LR10-labelled-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_evidence_norwegian_label_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_evidence_norwegian_label_40_en.md new file mode 100644 index 00000000000000..6f531b6f584c91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_evidence_norwegian_label_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_evidence_norwegian_label_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_evidence_norwegian_label_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_evidence_norwegian_label_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_evidence_norwegian_label_40_en_5.1.1_3.0_1694575387153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_evidence_norwegian_label_40_en_5.1.1_3.0_1694575387153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_evidence_norwegian_label_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_evidence_norwegian_label_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_evidence_norwegian_label_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-evidence-no-label-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en.md new file mode 100644 index 00000000000000..42baa44b0f5b1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en_5.1.1_3.0_1694575697199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40_en_5.1.1_3.0_1694575697199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr100_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR100-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en.md new file mode 100644 index 00000000000000..8d51641eafa082 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en_5.1.1_3.0_1694587179778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0_en_5.1.1_3.0_1694587179778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-20-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en.md new file mode 100644 index 00000000000000..63c70aa648c253 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en_5.1.1_3.0_1694587498641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1_en_5.1.1_3.0_1694587498641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_20_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-20-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt_en.md new file mode 100644 index 00000000000000..21f9d653f98d1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt_en_5.1.1_3.0_1694583930419.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt_en_5.1.1_3.0_1694583930419.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_0_prompt| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-40-0-prompt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1_en.md new file mode 100644 index 00000000000000..262851f34c3b21 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1_en_5.1.1_3.0_1694582866236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1_en_5.1.1_3.0_1694582866236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-40-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en.md new file mode 100644 index 00000000000000..aa4440ab6b4e39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en_5.1.1_3.0_1694583363515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2_en_5.1.1_3.0_1694583363515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-40-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en.md new file mode 100644 index 00000000000000..1f3705914e7257 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en_5.1.1_3.0_1694584252445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3_en_5.1.1_3.0_1694584252445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-40-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_en.md new file mode 100644 index 00000000000000..bd78c8deee6cda --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_en_5.1.1_3.0_1694578940076.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40_en_5.1.1_3.0_1694578940076.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0_en.md new file mode 100644 index 00000000000000..7c833d1b9264cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0_en_5.1.1_3.0_1694645453821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0_en_5.1.1_3.0_1694645453821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1_en.md new file mode 100644 index 00000000000000..6fda8b16781464 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1_en_5.1.1_3.0_1694646476945.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1_en_5.1.1_3.0_1694646476945.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2_en.md new file mode 100644 index 00000000000000..2a5c92934ce453 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2_en_5.1.1_3.0_1694588193598.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2_en_5.1.1_3.0_1694588193598.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3_en.md new file mode 100644 index 00000000000000..1a29158803d7ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3_en_5.1.1_3.0_1694588891046.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3_en_5.1.1_3.0_1694588891046.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4_en.md new file mode 100644 index 00000000000000..c72524aed9307d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4_en_5.1.1_3.0_1694589244932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4_en_5.1.1_3.0_1694589244932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en.md new file mode 100644 index 00000000000000..10d26077bb674b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en_5.1.1_3.0_1694569334144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20_en_5.1.1_3.0_1694569334144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21_en.md new file mode 100644 index 00000000000000..e2891bffbdaa6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21_en_5.1.1_3.0_1694569649905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21_en_5.1.1_3.0_1694569649905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_21| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-21 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22_en.md new file mode 100644 index 00000000000000..b3a053447a112c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22_en_5.1.1_3.0_1694569966201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22_en_5.1.1_3.0_1694569966201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_22| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-22 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en.md new file mode 100644 index 00000000000000..d6f9c2bf77517f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en_5.1.1_3.0_1694570314564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23_en_5.1.1_3.0_1694570314564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_23| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-23 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24_en.md new file mode 100644 index 00000000000000..28a34cb128ea8f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24_en_5.1.1_3.0_1694570634995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24_en_5.1.1_3.0_1694570634995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_24| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-24 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en.md new file mode 100644 index 00000000000000..aaedbbe999655e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en_5.1.1_3.0_1694570981130.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25_en_5.1.1_3.0_1694570981130.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_fast_25| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-fast-25 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10_en.md new file mode 100644 index 00000000000000..fe7aae847bd184 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10_en_5.1.1_3.0_1694573423912.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10_en_5.1.1_3.0_1694573423912.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11_en.md new file mode 100644 index 00000000000000..fedff5398add0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11_en_5.1.1_3.0_1694573761232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11_en_5.1.1_3.0_1694573761232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_11| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-11 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en.md new file mode 100644 index 00000000000000..49f1ac33dd7388 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en_5.1.1_3.0_1694574046482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12_en_5.1.1_3.0_1694574046482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_12| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en.md new file mode 100644 index 00000000000000..56968d95fd37a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en_5.1.1_3.0_1694574358005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13_en_5.1.1_3.0_1694574358005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_13| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en.md new file mode 100644 index 00000000000000..85029202afe2be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en_5.1.1_3.0_1694574703123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14_en_5.1.1_3.0_1694574703123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_14| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-14 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15_en.md new file mode 100644 index 00000000000000..9f62efd21eb560 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15_en_5.1.1_3.0_1694575016746.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15_en_5.1.1_3.0_1694575016746.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_15| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-15 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en.md new file mode 100644 index 00000000000000..3c74ff9fe82398 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en_5.1.1_3.0_1694575328626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16_en_5.1.1_3.0_1694575328626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_16| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-16 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en.md new file mode 100644 index 00000000000000..fbfc1cf4982f05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en_5.1.1_3.0_1694575626182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17_en_5.1.1_3.0_1694575626182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_17| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-17 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18_en.md new file mode 100644 index 00000000000000..32daae69f07e18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18_en_5.1.1_3.0_1694575920293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18_en_5.1.1_3.0_1694575920293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_18| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-18 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4_en.md new file mode 100644 index 00000000000000..c86bf1d18a8015 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4_en_5.1.1_3.0_1694571499195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4_en_5.1.1_3.0_1694571499195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5_en.md new file mode 100644 index 00000000000000..b555a847265302 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5_en_5.1.1_3.0_1694571831003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5_en_5.1.1_3.0_1694571831003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en.md new file mode 100644 index 00000000000000..78a13e0d225837 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en_5.1.1_3.0_1694572150639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6_en_5.1.1_3.0_1694572150639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7_en.md new file mode 100644 index 00000000000000..ef92da68f18d56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7_en_5.1.1_3.0_1694572454796.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7_en_5.1.1_3.0_1694572454796.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8_en.md new file mode 100644 index 00000000000000..74318af2756bbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8_en_5.1.1_3.0_1694572788274.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8_en_5.1.1_3.0_1694572788274.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_8| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en.md new file mode 100644 index 00000000000000..86b598b7a2c880 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en_5.1.1_3.0_1694573104931.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9_en_5.1.1_3.0_1694573104931.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr50_8_fast_9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR50-8-fast-9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_en.md new file mode 100644 index 00000000000000..11a6c25a169b43 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_sigir_support_refute_norwegian_label_40_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_en_5.1.1_3.0_1694575070771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_en_5.1.1_3.0_1694575070771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_whole_word_masking_finetuned_bert_mlm6_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_whole_word_masking_finetuned_bert_mlm6_en.md new file mode 100644 index 00000000000000..50264b05b27605 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_cased_whole_word_masking_finetuned_bert_mlm6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_whole_word_masking_finetuned_bert_mlm6 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_large_cased_whole_word_masking_finetuned_bert_mlm6 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_whole_word_masking_finetuned_bert_mlm6` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_whole_word_masking_finetuned_bert_mlm6_en_5.1.1_3.0_1694643215167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_whole_word_masking_finetuned_bert_mlm6_en_5.1.1_3.0_1694643215167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_whole_word_masking_finetuned_bert_mlm6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_whole_word_masking_finetuned_bert_mlm6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_whole_word_masking_finetuned_bert_mlm6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-large-cased-whole-word-masking-finetuned-BERT-mlm6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_contrastive_self_supervised_acl2020_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_contrastive_self_supervised_acl2020_en.md new file mode 100644 index 00000000000000..09d9d0127b41ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_contrastive_self_supervised_acl2020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_contrastive_self_supervised_acl2020 BertEmbeddings from sap-ai-research +author: John Snow Labs +name: bert_large_contrastive_self_supervised_acl2020 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_contrastive_self_supervised_acl2020` is a English model originally trained by sap-ai-research. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_contrastive_self_supervised_acl2020_en_5.1.1_3.0_1694571008191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_contrastive_self_supervised_acl2020_en_5.1.1_3.0_1694571008191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_contrastive_self_supervised_acl2020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_contrastive_self_supervised_acl2020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_contrastive_self_supervised_acl2020| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/sap-ai-research/BERT-Large-Contrastive-Self-Supervised-ACL2020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_nli_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_nli_en.md new file mode 100644 index 00000000000000..052fd12c99b54a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_nli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_nli BertEmbeddings from binwang +author: John Snow Labs +name: bert_large_nli +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_nli` is a English model originally trained by binwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_nli_en_5.1.1_3.0_1694588802340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_nli_en_5.1.1_3.0_1694588802340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_nli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_nli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_nli| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/binwang/bert-large-nli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_nli_stsb_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_nli_stsb_en.md new file mode 100644 index 00000000000000..d0d80f42a38572 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_nli_stsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_nli_stsb BertEmbeddings from binwang +author: John Snow Labs +name: bert_large_nli_stsb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_nli_stsb` is a English model originally trained by binwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_nli_stsb_en_5.1.1_3.0_1694588441505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_nli_stsb_en_5.1.1_3.0_1694588441505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_nli_stsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_nli_stsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_nli_stsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/binwang/bert-large-nli-stsb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_retrained_2_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_retrained_2_epochs_en.md new file mode 100644 index 00000000000000..2b808c2f7fab8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_retrained_2_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_retrained_2_epochs BertEmbeddings from trangdieu +author: John Snow Labs +name: bert_large_retrained_2_epochs +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_retrained_2_epochs` is a English model originally trained by trangdieu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_retrained_2_epochs_en_5.1.1_3.0_1694578433457.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_retrained_2_epochs_en_5.1.1_3.0_1694578433457.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_retrained_2_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_retrained_2_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_retrained_2_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/trangdieu/bert-large-retrained-2-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_retrained_4_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_retrained_4_epochs_en.md new file mode 100644 index 00000000000000..31407c22b6f5ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_retrained_4_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_retrained_4_epochs BertEmbeddings from trangdieu +author: John Snow Labs +name: bert_large_retrained_4_epochs +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_retrained_4_epochs` is a English model originally trained by trangdieu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_retrained_4_epochs_en_5.1.1_3.0_1694578721586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_retrained_4_epochs_en_5.1.1_3.0_1694578721586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_retrained_4_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_retrained_4_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_retrained_4_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/trangdieu/bert-large-retrained-4-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_150_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_150_en.md new file mode 100644 index 00000000000000..ea5af26fb79536 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_150_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_swedish_nordic_pile_150 BertEmbeddings from timpal0l +author: John Snow Labs +name: bert_large_swedish_nordic_pile_150 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_swedish_nordic_pile_150` is a English model originally trained by timpal0l. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_swedish_nordic_pile_150_en_5.1.1_3.0_1694579612852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_swedish_nordic_pile_150_en_5.1.1_3.0_1694579612852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_swedish_nordic_pile_150","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_swedish_nordic_pile_150", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_swedish_nordic_pile_150| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/timpal0l/bert_large_sv_nordic_pile_150 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_320_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_320_en.md new file mode 100644 index 00000000000000..549d1f84090c57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_nordic_pile_320_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_swedish_nordic_pile_320 BertEmbeddings from timpal0l +author: John Snow Labs +name: bert_large_swedish_nordic_pile_320 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_swedish_nordic_pile_320` is a English model originally trained by timpal0l. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_swedish_nordic_pile_320_en_5.1.1_3.0_1694583683417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_swedish_nordic_pile_320_en_5.1.1_3.0_1694583683417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_swedish_nordic_pile_320","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_swedish_nordic_pile_320", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_swedish_nordic_pile_320| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/timpal0l/bert_large_sv_nordic_pile_320 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_uncased_en.md new file mode 100644 index 00000000000000..234d4adf88b1ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_swedish_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_swedish_uncased BertEmbeddings from af-ai-center +author: John Snow Labs +name: bert_large_swedish_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_swedish_uncased` is a English model originally trained by af-ai-center. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_swedish_uncased_en_5.1.1_3.0_1694577225450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_swedish_uncased_en_5.1.1_3.0_1694577225450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_swedish_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_swedish_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_swedish_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/af-ai-center/bert-large-swedish-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_bert_mlm5_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_bert_mlm5_en.md new file mode 100644 index 00000000000000..0dd46dc6a2b92e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_bert_mlm5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_bert_mlm5 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_large_uncased_finetuned_bert_mlm5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_bert_mlm5` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_bert_mlm5_en_5.1.1_3.0_1694642005687.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_bert_mlm5_en_5.1.1_3.0_1694642005687.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_bert_mlm5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_bert_mlm5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_bert_mlm5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-large-uncased-finetuned-bert-mlm5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en.md new file mode 100644 index 00000000000000..94d90204495b8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_lowr100_4_uncased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_lowr100_4_uncased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_lowr100_4_uncased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en_5.1.1_3.0_1694568669304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_lowr100_4_uncased_da_20_en_5.1.1_3.0_1694568669304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_lowr100_4_uncased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_lowr100_4_uncased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_lowr100_4_uncased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-lowR100-4-uncased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en.md new file mode 100644 index 00000000000000..290ff49ccce50e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_finetuned_lowr100_5_uncased_da_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_uncased_finetuned_lowr100_5_uncased_da_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_finetuned_lowr100_5_uncased_da_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en_5.1.1_3.0_1694568976431.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_finetuned_lowr100_5_uncased_da_20_en_5.1.1_3.0_1694568976431.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_finetuned_lowr100_5_uncased_da_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_finetuned_lowr100_5_uncased_da_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_finetuned_lowr100_5_uncased_da_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-uncased-finetuned-lowR100-5-uncased-DA-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_sparse_80_1x4_block_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_sparse_80_1x4_block_pruneofa_en.md new file mode 100644 index 00000000000000..ed30eb4cde1dd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_sparse_80_1x4_block_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_sparse_80_1x4_block_pruneofa BertEmbeddings from Intel +author: John Snow Labs +name: bert_large_uncased_sparse_80_1x4_block_pruneofa +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_sparse_80_1x4_block_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sparse_80_1x4_block_pruneofa_en_5.1.1_3.0_1694621561165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_sparse_80_1x4_block_pruneofa_en_5.1.1_3.0_1694621561165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_sparse_80_1x4_block_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_sparse_80_1x4_block_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_sparse_80_1x4_block_pruneofa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|494.9 MB| + +## References + +https://huggingface.co/Intel/bert-large-uncased-sparse-80-1x4-block-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_whole_word_masking_finetuned_bert_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_whole_word_masking_finetuned_bert_mlm_en.md new file mode 100644 index 00000000000000..cd6739e3038b81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_uncased_whole_word_masking_finetuned_bert_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_uncased_whole_word_masking_finetuned_bert_mlm BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_large_uncased_whole_word_masking_finetuned_bert_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_uncased_whole_word_masking_finetuned_bert_mlm` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_uncased_whole_word_masking_finetuned_bert_mlm_en_5.1.1_3.0_1694641067297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_uncased_whole_word_masking_finetuned_bert_mlm_en_5.1.1_3.0_1694641067297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_uncased_whole_word_masking_finetuned_bert_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_uncased_whole_word_masking_finetuned_bert_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_uncased_whole_word_masking_finetuned_bert_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-large-uncased-whole-word-masking-finetuned-bert-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_large_yc_recipe_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_large_yc_recipe_30_en.md new file mode 100644 index 00000000000000..4a7e7a1eef328a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_large_yc_recipe_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_yc_recipe_30 BertEmbeddings from CennetOguz +author: John Snow Labs +name: bert_large_yc_recipe_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_yc_recipe_30` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_yc_recipe_30_en_5.1.1_3.0_1694568821432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_yc_recipe_30_en_5.1.1_3.0_1694568821432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_yc_recipe_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_yc_recipe_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_yc_recipe_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/CennetOguz/bert_large_yc_recipe_30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_medium_arabic_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_arabic_ar.md new file mode 100644 index 00000000000000..633e34076de7d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_arabic_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_medium_arabic BertEmbeddings from asafaya +author: John Snow Labs +name: bert_medium_arabic +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_medium_arabic` is a Arabic model originally trained by asafaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_medium_arabic_ar_5.1.1_3.0_1694582427477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_medium_arabic_ar_5.1.1_3.0_1694582427477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_medium_arabic","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_medium_arabic", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_medium_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|157.2 MB| + +## References + +https://huggingface.co/asafaya/bert-medium-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_medium_arapoembert_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_arapoembert_en.md new file mode 100644 index 00000000000000..0a003e2ed46b26 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_arapoembert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_medium_arapoembert BertEmbeddings from faisalq +author: John Snow Labs +name: bert_medium_arapoembert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_medium_arapoembert` is a English model originally trained by faisalq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_medium_arapoembert_en_5.1.1_3.0_1694614622989.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_medium_arapoembert_en_5.1.1_3.0_1694614622989.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_medium_arapoembert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_medium_arapoembert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_medium_arapoembert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|356.2 MB| + +## References + +https://huggingface.co/faisalq/bert-medium-arapoembert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_medium_historic_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_historic_multilingual_cased_xx.md new file mode 100644 index 00000000000000..abfbd0ff6933c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_historic_multilingual_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_medium_historic_multilingual_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_medium_historic_multilingual_cased +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_medium_historic_multilingual_cased` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_medium_historic_multilingual_cased_xx_5.1.1_3.0_1694598781999.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_medium_historic_multilingual_cased_xx_5.1.1_3.0_1694598781999.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_medium_historic_multilingual_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_medium_historic_multilingual_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_medium_historic_multilingual_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|156.8 MB| + +## References + +https://huggingface.co/dbmdz/bert-medium-historic-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_medium_mlsm_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_mlsm_en.md new file mode 100644 index 00000000000000..5395d4019074b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_mlsm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_medium_mlsm BertEmbeddings from SzegedAI +author: John Snow Labs +name: bert_medium_mlsm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_medium_mlsm` is a English model originally trained by SzegedAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_medium_mlsm_en_5.1.1_3.0_1694598153281.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_medium_mlsm_en_5.1.1_3.0_1694598153281.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_medium_mlsm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_medium_mlsm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_medium_mlsm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|157.1 MB| + +## References + +https://huggingface.co/SzegedAI/bert-medium-mlsm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_medium_pretrained_on_squad_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_pretrained_on_squad_en.md new file mode 100644 index 00000000000000..ed9774d369d15a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_medium_pretrained_on_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_medium_pretrained_on_squad BertEmbeddings from anas-awadalla +author: John Snow Labs +name: bert_medium_pretrained_on_squad +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_medium_pretrained_on_squad` is a English model originally trained by anas-awadalla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_medium_pretrained_on_squad_en_5.1.1_3.0_1694579855883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_medium_pretrained_on_squad_en_5.1.1_3.0_1694579855883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_medium_pretrained_on_squad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_medium_pretrained_on_squad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_medium_pretrained_on_squad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|154.2 MB| + +## References + +https://huggingface.co/anas-awadalla/bert-medium-pretrained-on-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_mini_arabic_ar.md b/docs/_posts/ahmedlone127/2023-09-13-bert_mini_arabic_ar.md new file mode 100644 index 00000000000000..0754572efb6391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_mini_arabic_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic bert_mini_arabic BertEmbeddings from asafaya +author: John Snow Labs +name: bert_mini_arabic +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_arabic` is a Arabic model originally trained by asafaya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_arabic_ar_5.1.1_3.0_1694582500015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_arabic_ar_5.1.1_3.0_1694582500015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_mini_arabic","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_mini_arabic", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|43.3 MB| + +## References + +https://huggingface.co/asafaya/bert-mini-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_mini_historic_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_mini_historic_multilingual_cased_xx.md new file mode 100644 index 00000000000000..40be9370125ff1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_mini_historic_multilingual_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_mini_historic_multilingual_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_mini_historic_multilingual_cased +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_mini_historic_multilingual_cased` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_mini_historic_multilingual_cased_xx_5.1.1_3.0_1694599073613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_mini_historic_multilingual_cased_xx_5.1.1_3.0_1694599073613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_mini_historic_multilingual_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_mini_historic_multilingual_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_mini_historic_multilingual_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|43.3 MB| + +## References + +https://huggingface.co/dbmdz/bert-mini-historic-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_model_bstad_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_model_bstad_en.md new file mode 100644 index 00000000000000..d81b3f1f1f4af1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_model_bstad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_model_bstad BertEmbeddings from bstad +author: John Snow Labs +name: bert_model_bstad +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_model_bstad` is a English model originally trained by bstad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_model_bstad_en_5.1.1_3.0_1694589393853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_model_bstad_en_5.1.1_3.0_1694589393853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_model_bstad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_model_bstad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_model_bstad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/bstad/bert-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_model_nyashavision22_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_model_nyashavision22_en.md new file mode 100644 index 00000000000000..3c902faad2fe87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_model_nyashavision22_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_model_nyashavision22 BertEmbeddings from NyashaVision22 +author: John Snow Labs +name: bert_model_nyashavision22 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_model_nyashavision22` is a English model originally trained by NyashaVision22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_model_nyashavision22_en_5.1.1_3.0_1694576589229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_model_nyashavision22_en_5.1.1_3.0_1694576589229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_model_nyashavision22","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_model_nyashavision22", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_model_nyashavision22| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/NyashaVision22/bert_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_multilingial_geolocation_prediction_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_multilingial_geolocation_prediction_en.md new file mode 100644 index 00000000000000..6a60d1b50a7c64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_multilingial_geolocation_prediction_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_multilingial_geolocation_prediction BertEmbeddings from k4tel +author: John Snow Labs +name: bert_multilingial_geolocation_prediction +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_multilingial_geolocation_prediction` is a English model originally trained by k4tel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_multilingial_geolocation_prediction_en_5.1.1_3.0_1694601831218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_multilingial_geolocation_prediction_en_5.1.1_3.0_1694601831218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_multilingial_geolocation_prediction","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_multilingial_geolocation_prediction", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_multilingial_geolocation_prediction| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|663.2 MB| + +## References + +https://huggingface.co/k4tel/bert-multilingial-geolocation-prediction \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_nlp_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_nlp_en.md new file mode 100644 index 00000000000000..39e3cba70f042e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_nlp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_nlp BertEmbeddings from subbareddyiiit +author: John Snow Labs +name: bert_nlp +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_nlp` is a English model originally trained by subbareddyiiit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_nlp_en_5.1.1_3.0_1694575401616.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_nlp_en_5.1.1_3.0_1694575401616.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_nlp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_nlp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_nlp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/subbareddyiiit/BERT-NLP \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_inf_corpus_v.1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_inf_corpus_v.1_en.md new file mode 100644 index 00000000000000..2ece59f5ffa5d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_inf_corpus_v.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_portuguese_inf_corpus_v.1 BertEmbeddings from ricardo-filho +author: John Snow Labs +name: bert_portuguese_inf_corpus_v.1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_inf_corpus_v.1` is a English model originally trained by ricardo-filho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_inf_corpus_v.1_en_5.1.1_3.0_1694568024545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_inf_corpus_v.1_en_5.1.1_3.0_1694568024545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_portuguese_inf_corpus_v.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_portuguese_inf_corpus_v.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_inf_corpus_v.1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ricardo-filho/BERT-pt-inf-corpus-v.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_corpus_v.1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_corpus_v.1_en.md new file mode 100644 index 00000000000000..85eb5332731eeb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_corpus_v.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_portuguese_institutional_corpus_v.1 BertEmbeddings from ricardo-filho +author: John Snow Labs +name: bert_portuguese_institutional_corpus_v.1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_institutional_corpus_v.1` is a English model originally trained by ricardo-filho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_institutional_corpus_v.1_en_5.1.1_3.0_1694568183725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_institutional_corpus_v.1_en_5.1.1_3.0_1694568183725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_portuguese_institutional_corpus_v.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_portuguese_institutional_corpus_v.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_institutional_corpus_v.1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ricardo-filho/BERT-pt-institutional-corpus-v.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_en.md new file mode 100644 index 00000000000000..7b1160dc5c975a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_portuguese_institutional_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_portuguese_institutional BertEmbeddings from ricardo-filho +author: John Snow Labs +name: bert_portuguese_institutional +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_portuguese_institutional` is a English model originally trained by ricardo-filho. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_portuguese_institutional_en_5.1.1_3.0_1694568312532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_portuguese_institutional_en_5.1.1_3.0_1694568312532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_portuguese_institutional","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_portuguese_institutional", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_portuguese_institutional| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/ricardo-filho/BERT-pt-institutional \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_pretrain_btk_mufi_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_pretrain_btk_mufi_en.md new file mode 100644 index 00000000000000..65e9b63b26a662 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_pretrain_btk_mufi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pretrain_btk_mufi BertEmbeddings from btk-mufi +author: John Snow Labs +name: bert_pretrain_btk_mufi +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pretrain_btk_mufi` is a English model originally trained by btk-mufi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pretrain_btk_mufi_en_5.1.1_3.0_1694589810242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pretrain_btk_mufi_en_5.1.1_3.0_1694589810242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pretrain_btk_mufi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pretrain_btk_mufi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pretrain_btk_mufi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/btk-mufi/bert-pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_pretrain_onlydj96_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_pretrain_onlydj96_en.md new file mode 100644 index 00000000000000..71165441c55e35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_pretrain_onlydj96_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pretrain_onlydj96 BertEmbeddings from onlydj96 +author: John Snow Labs +name: bert_pretrain_onlydj96 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pretrain_onlydj96` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pretrain_onlydj96_en_5.1.1_3.0_1694629039142.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pretrain_onlydj96_en_5.1.1_3.0_1694629039142.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pretrain_onlydj96","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pretrain_onlydj96", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pretrain_onlydj96| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.6 MB| + +## References + +https://huggingface.co/onlydj96/bert_pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_pretraining_gaudi_2_batch_size_32_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_pretraining_gaudi_2_batch_size_32_en.md new file mode 100644 index 00000000000000..8936eb5fcbbbe8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_pretraining_gaudi_2_batch_size_32_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pretraining_gaudi_2_batch_size_32 BertEmbeddings from regisss +author: John Snow Labs +name: bert_pretraining_gaudi_2_batch_size_32 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pretraining_gaudi_2_batch_size_32` is a English model originally trained by regisss. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pretraining_gaudi_2_batch_size_32_en_5.1.1_3.0_1694646591739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pretraining_gaudi_2_batch_size_32_en_5.1.1_3.0_1694646591739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pretraining_gaudi_2_batch_size_32","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pretraining_gaudi_2_batch_size_32", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pretraining_gaudi_2_batch_size_32| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/regisss/bert-pretraining-gaudi-2-batch-size-32 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_pretraining_gaudi_2_batch_size_64_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_pretraining_gaudi_2_batch_size_64_en.md new file mode 100644 index 00000000000000..ad2d5da98c2987 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_pretraining_gaudi_2_batch_size_64_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pretraining_gaudi_2_batch_size_64 BertEmbeddings from regisss +author: John Snow Labs +name: bert_pretraining_gaudi_2_batch_size_64 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pretraining_gaudi_2_batch_size_64` is a English model originally trained by regisss. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pretraining_gaudi_2_batch_size_64_en_5.1.1_3.0_1694648529845.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pretraining_gaudi_2_batch_size_64_en_5.1.1_3.0_1694648529845.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pretraining_gaudi_2_batch_size_64","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pretraining_gaudi_2_batch_size_64", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pretraining_gaudi_2_batch_size_64| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.3 MB| + +## References + +https://huggingface.co/regisss/bert-pretraining-gaudi-2-batch-size-64 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_pt_laptop_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_pt_laptop_en.md new file mode 100644 index 00000000000000..0d8598c932a835 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_pt_laptop_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pt_laptop BertEmbeddings from activebus +author: John Snow Labs +name: bert_pt_laptop +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pt_laptop` is a English model originally trained by activebus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pt_laptop_en_5.1.1_3.0_1694575945954.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pt_laptop_en_5.1.1_3.0_1694575945954.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pt_laptop","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pt_laptop", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pt_laptop| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.4 MB| + +## References + +https://huggingface.co/activebus/BERT-PT_laptop \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_pt_rest_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_pt_rest_en.md new file mode 100644 index 00000000000000..e5a75c271c4afc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_pt_rest_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pt_rest BertEmbeddings from activebus +author: John Snow Labs +name: bert_pt_rest +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pt_rest` is a English model originally trained by activebus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pt_rest_en_5.1.1_3.0_1694576170608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pt_rest_en_5.1.1_3.0_1694576170608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pt_rest","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pt_rest", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pt_rest| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.4 MB| + +## References + +https://huggingface.co/activebus/BERT-PT_rest \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_review_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_review_en.md new file mode 100644 index 00000000000000..481b86ea562085 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_review_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_review BertEmbeddings from activebus +author: John Snow Labs +name: bert_review +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_review` is a English model originally trained by activebus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_review_en_5.1.1_3.0_1694576401917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_review_en_5.1.1_3.0_1694576401917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_review","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_review", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_review| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.5 MB| + +## References + +https://huggingface.co/activebus/BERT_Review \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_semeval_env_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_semeval_env_en.md new file mode 100644 index 00000000000000..cf0aff26c2994a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_semeval_env_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_semeval_env BertEmbeddings from Indraa99 +author: John Snow Labs +name: bert_semeval_env +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_semeval_env` is a English model originally trained by Indraa99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_semeval_env_en_5.1.1_3.0_1694600701019.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_semeval_env_en_5.1.1_3.0_1694600701019.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_semeval_env","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_semeval_env", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_semeval_env| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Indraa99/bert_semeval_env \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_cord19_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_cord19_en.md new file mode 100644 index 00000000000000..0daf7b1a83b9d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_cord19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_cord19 BertEmbeddings from NeuML +author: John Snow Labs +name: bert_small_cord19 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_cord19` is a English model originally trained by NeuML. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_cord19_en_5.1.1_3.0_1694569266803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_cord19_en_5.1.1_3.0_1694569266803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_cord19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_cord19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_cord19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|130.5 MB| + +## References + +https://huggingface.co/NeuML/bert-small-cord19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_en.md new file mode 100644 index 00000000000000..d0fdcade4480da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finer_en_5.1.1_3.0_1694578634265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finer_en_5.1.1_3.0_1694578634265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_longer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_longer_en.md new file mode 100644 index 00000000000000..597ad6f196b6b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finer_longer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finer_longer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finer_longer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finer_longer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finer_longer_en_5.1.1_3.0_1694578725327.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finer_longer_en_5.1.1_3.0_1694578725327.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finer_longer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finer_longer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finer_longer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finer-longer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_en.md new file mode 100644 index 00000000000000..b8717811c8441f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_eurlex BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_eurlex +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_eurlex` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_eurlex_en_5.1.1_3.0_1694569896945.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_eurlex_en_5.1.1_3.0_1694569896945.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_eurlex","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_eurlex", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_eurlex| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-eurlex \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_longer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_longer_en.md new file mode 100644 index 00000000000000..f93de11cb3d516 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_eurlex_longer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_eurlex_longer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_eurlex_longer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_eurlex_longer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_eurlex_longer_en_5.1.1_3.0_1694569982036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_eurlex_longer_en_5.1.1_3.0_1694569982036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_eurlex_longer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_eurlex_longer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_eurlex_longer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-eurlex-longer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_en.md new file mode 100644 index 00000000000000..0370444f921f6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_finer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_finer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_finer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_finer_en_5.1.1_3.0_1694577758798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_finer_en_5.1.1_3.0_1694577758798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_finer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_finer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_finer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-finer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_longer10_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_longer10_en.md new file mode 100644 index 00000000000000..5bc0cf5e011fac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_finer_longer10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_finer_longer10 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_finer_longer10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_finer_longer10` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_finer_longer10_en_5.1.1_3.0_1694577859721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_finer_longer10_en_5.1.1_3.0_1694577859721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_finer_longer10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_finer_longer10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_finer_longer10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-finer-longer10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts10train10val_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts10train10val_en.md new file mode 100644 index 00000000000000..074215c3c9423f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts10train10val_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_legal_contracts10train10val BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_legal_contracts10train10val +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_legal_contracts10train10val` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_contracts10train10val_en_5.1.1_3.0_1694569317652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_contracts10train10val_en_5.1.1_3.0_1694569317652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_legal_contracts10train10val","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_legal_contracts10train10val", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_legal_contracts10train10val| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-legal-contracts10train10val \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts_larger20_5_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts_larger20_5_1_en.md new file mode 100644 index 00000000000000..435aeeb1d244bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts_larger20_5_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_legal_contracts_larger20_5_1 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_legal_contracts_larger20_5_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_legal_contracts_larger20_5_1` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_contracts_larger20_5_1_en_5.1.1_3.0_1694572830320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_contracts_larger20_5_1_en_5.1.1_3.0_1694572830320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_legal_contracts_larger20_5_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_legal_contracts_larger20_5_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_legal_contracts_larger20_5_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-legal-contracts-larger20-5-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts_larger4010_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts_larger4010_en.md new file mode 100644 index 00000000000000..dfd10511e03d99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_contracts_larger4010_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_legal_contracts_larger4010 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_legal_contracts_larger4010 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_legal_contracts_larger4010` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_contracts_larger4010_en_5.1.1_3.0_1694572295418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_contracts_larger4010_en_5.1.1_3.0_1694572295418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_legal_contracts_larger4010","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_legal_contracts_larger4010", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_legal_contracts_larger4010| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-legal-contracts-larger4010 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_definitions_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_definitions_en.md new file mode 100644 index 00000000000000..ed5356442609bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_definitions_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_legal_definitions BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_legal_definitions +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_legal_definitions` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_definitions_en_5.1.1_3.0_1694568934992.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_definitions_en_5.1.1_3.0_1694568934992.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_legal_definitions","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_legal_definitions", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_legal_definitions| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-legal-definitions \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_definitions_longer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_definitions_longer_en.md new file mode 100644 index 00000000000000..c3274873f0bb6c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_legal_definitions_longer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_legal_definitions_longer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_legal_definitions_longer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_legal_definitions_longer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_definitions_longer_en_5.1.1_3.0_1694569019352.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_legal_definitions_longer_en_5.1.1_3.0_1694569019352.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_legal_definitions_longer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_legal_definitions_longer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_legal_definitions_longer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-legal-definitions-longer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed20_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed20_en.md new file mode 100644 index 00000000000000..7ba28d136dcab8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_parsed20 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_parsed20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_parsed20` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_parsed20_en_5.1.1_3.0_1694573762439.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_parsed20_en_5.1.1_3.0_1694573762439.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_parsed20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_parsed20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_parsed20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-parsed20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed_longer100_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed_longer100_en.md new file mode 100644 index 00000000000000..3049691293e264 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed_longer100_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_parsed_longer100 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_parsed_longer100 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_parsed_longer100` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_parsed_longer100_en_5.1.1_3.0_1694573963051.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_parsed_longer100_en_5.1.1_3.0_1694573963051.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_parsed_longer100","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_parsed_longer100", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_parsed_longer100| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-parsed-longer100 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed_longer50_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed_longer50_en.md new file mode 100644 index 00000000000000..5d895c836551c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_finetuned_parsed_longer50_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_finetuned_parsed_longer50 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_finetuned_parsed_longer50 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_finetuned_parsed_longer50` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_parsed_longer50_en_5.1.1_3.0_1694573838567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_finetuned_parsed_longer50_en_5.1.1_3.0_1694573838567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_finetuned_parsed_longer50","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_finetuned_parsed_longer50", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_finetuned_parsed_longer50| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-finetuned-parsed-longer50 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_historic_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_historic_multilingual_cased_xx.md new file mode 100644 index 00000000000000..0fe24dcc60b249 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_historic_multilingual_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_small_historic_multilingual_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_small_historic_multilingual_cased +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_historic_multilingual_cased` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_historic_multilingual_cased_xx_5.1.1_3.0_1694599387528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_historic_multilingual_cased_xx_5.1.1_3.0_1694599387528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_historic_multilingual_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_historic_multilingual_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_historic_multilingual_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|109.5 MB| + +## References + +https://huggingface.co/dbmdz/bert-small-historic-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_nan_labels_500_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_nan_labels_500_en.md new file mode 100644 index 00000000000000..b6ca774d2c0695 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_nan_labels_500_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_nan_labels_500 BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_small_nan_labels_500 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_nan_labels_500` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_nan_labels_500_en_5.1.1_3.0_1694574048493.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_nan_labels_500_en_5.1.1_3.0_1694574048493.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_nan_labels_500","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_nan_labels_500", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_nan_labels_500| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/bert-small-nan-labels-500 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_small_pretrained_on_squad_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_small_pretrained_on_squad_en.md new file mode 100644 index 00000000000000..9c78abf0796c12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_small_pretrained_on_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_small_pretrained_on_squad BertEmbeddings from anas-awadalla +author: John Snow Labs +name: bert_small_pretrained_on_squad +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_small_pretrained_on_squad` is a English model originally trained by anas-awadalla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_small_pretrained_on_squad_en_5.1.1_3.0_1694579945463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_small_pretrained_on_squad_en_5.1.1_3.0_1694579945463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_small_pretrained_on_squad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_small_pretrained_on_squad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_small_pretrained_on_squad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/anas-awadalla/bert-small-pretrained-on-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_telugu_23_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_telugu_23_en.md new file mode 100644 index 00000000000000..ce4153198ca85c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_telugu_23_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_telugu_23 BertEmbeddings from swadesh7 +author: John Snow Labs +name: bert_telugu_23 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_telugu_23` is a English model originally trained by swadesh7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_telugu_23_en_5.1.1_3.0_1694596933667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_telugu_23_en_5.1.1_3.0_1694596933667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_telugu_23","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_telugu_23", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_telugu_23| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.1 MB| + +## References + +https://huggingface.co/swadesh7/bert_telugu_23 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_legal_definitions_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_legal_definitions_en.md new file mode 100644 index 00000000000000..e30e903e9dfbb6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_legal_definitions_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_tiny_finetuned_legal_definitions BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_tiny_finetuned_legal_definitions +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_legal_definitions` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_legal_definitions_en_5.1.1_3.0_1694564433910.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_legal_definitions_en_5.1.1_3.0_1694564433910.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_tiny_finetuned_legal_definitions","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_tiny_finetuned_legal_definitions", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_legal_definitions| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/bert-tiny-finetuned-legal-definitions \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en.md new file mode 100644 index 00000000000000..f0ff6a6c7c3782 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_tiny_finetuned_nan_labels_nepal_bhasa_longer BertEmbeddings from muhtasham +author: John Snow Labs +name: bert_tiny_finetuned_nan_labels_nepal_bhasa_longer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_finetuned_nan_labels_nepal_bhasa_longer` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en_5.1.1_3.0_1694565410527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_finetuned_nan_labels_nepal_bhasa_longer_en_5.1.1_3.0_1694565410527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_tiny_finetuned_nan_labels_nepal_bhasa_longer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_tiny_finetuned_nan_labels_nepal_bhasa_longer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_finetuned_nan_labels_nepal_bhasa_longer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/bert-tiny-finetuned-nan-labels-new-longer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_historic_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_historic_multilingual_cased_xx.md new file mode 100644 index 00000000000000..4028b6eb87f7bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_historic_multilingual_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual bert_tiny_historic_multilingual_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_tiny_historic_multilingual_cased +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_historic_multilingual_cased` is a Multilingual model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_historic_multilingual_cased_xx_5.1.1_3.0_1694599550183.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_historic_multilingual_cased_xx_5.1.1_3.0_1694599550183.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_tiny_historic_multilingual_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_tiny_historic_multilingual_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_historic_multilingual_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|17.4 MB| + +## References + +https://huggingface.co/dbmdz/bert-tiny-historic-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_standard_bahasa_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_standard_bahasa_cased_en.md new file mode 100644 index 00000000000000..9d2f7d91d43067 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_tiny_standard_bahasa_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_tiny_standard_bahasa_cased BertEmbeddings from mesolitica +author: John Snow Labs +name: bert_tiny_standard_bahasa_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tiny_standard_bahasa_cased` is a English model originally trained by mesolitica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tiny_standard_bahasa_cased_en_5.1.1_3.0_1694586694738.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tiny_standard_bahasa_cased_en_5.1.1_3.0_1694586694738.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_tiny_standard_bahasa_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_tiny_standard_bahasa_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tiny_standard_bahasa_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|61.5 MB| + +## References + +https://huggingface.co/mesolitica/bert-tiny-standard-bahasa-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_truncate_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_truncate_en.md new file mode 100644 index 00000000000000..ab00b088985163 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_truncate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_truncate BertEmbeddings from NasimB +author: John Snow Labs +name: bert_truncate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_truncate` is a English model originally trained by NasimB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_truncate_en_5.1.1_3.0_1694587157781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_truncate_en_5.1.1_3.0_1694587157781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_truncate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_truncate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_truncate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|400.3 MB| + +## References + +https://huggingface.co/NasimB/bert-truncate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_twitter_hashtag_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_twitter_hashtag_en.md new file mode 100644 index 00000000000000..8d629b57d1fe87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_twitter_hashtag_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_twitter_hashtag BertEmbeddings from vivianhuang88 +author: John Snow Labs +name: bert_twitter_hashtag +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_twitter_hashtag` is a English model originally trained by vivianhuang88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_twitter_hashtag_en_5.1.1_3.0_1694639330517.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_twitter_hashtag_en_5.1.1_3.0_1694639330517.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_twitter_hashtag","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_twitter_hashtag", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_twitter_hashtag| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/vivianhuang88/bert_twitter_hashtag \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_ucb_5_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_ucb_5_en.md new file mode 100644 index 00000000000000..0f4f477fd701a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_ucb_5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_ucb_5 BertEmbeddings from Diegomejia +author: John Snow Labs +name: bert_ucb_5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ucb_5` is a English model originally trained by Diegomejia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ucb_5_en_5.1.1_3.0_1694570353048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ucb_5_en_5.1.1_3.0_1694570353048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_ucb_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_ucb_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ucb_5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Diegomejia/bert-ucb-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_10_h_512_a_8_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_10_h_512_a_8_cord19_200616_en.md new file mode 100644 index 00000000000000..5fac0d66ef244d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_10_h_512_a_8_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_10_h_512_a_8_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_10_h_512_a_8_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_10_h_512_a_8_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_10_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580662292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_10_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580662292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_10_h_512_a_8_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_10_h_512_a_8_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_10_h_512_a_8_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|177.4 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-10_H-512_A-8_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_128_a_2_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_128_a_2_cord19_200616_en.md new file mode 100644 index 00000000000000..64cf2614b5e33e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_128_a_2_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_2_h_128_a_2_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_2_h_128_a_2_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_2_h_128_a_2_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_128_a_2_cord19_200616_en_5.1.1_3.0_1694580732283.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_128_a_2_cord19_200616_en_5.1.1_3.0_1694580732283.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_2_h_128_a_2_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_2_h_128_a_2_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_2_h_128_a_2_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.6 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-2_H-128_A-2_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en.md new file mode 100644 index 00000000000000..5faa852e2c8546 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq BertEmbeddings from postbot +author: John Snow Labs +name: bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq` is a English model originally trained by postbot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en_5.1.1_3.0_1694575813152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq_en_5.1.1_3.0_1694575813152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_2_h_256_a_4_mlm_multi_emails_hq| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|35.9 MB| + +## References + +https://huggingface.co/postbot/bert_uncased_L-2_H-256_A-4-mlm-multi-emails-hq \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_512_a_8_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_512_a_8_cord19_200616_en.md new file mode 100644 index 00000000000000..2ab2f5f66731e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_2_h_512_a_8_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_2_h_512_a_8_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_2_h_512_a_8_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_2_h_512_a_8_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580817170.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_2_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580817170.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_2_h_512_a_8_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_2_h_512_a_8_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_2_h_512_a_8_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|83.3 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-2_H-512_A-8_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_256_a_4_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_256_a_4_cord19_200616_en.md new file mode 100644 index 00000000000000..c970369a6154e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_256_a_4_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_4_h_256_a_4_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_4_h_256_a_4_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_4_h_256_a_4_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_4_h_256_a_4_cord19_200616_en_5.1.1_3.0_1694580897916.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_4_h_256_a_4_cord19_200616_en_5.1.1_3.0_1694580897916.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_4_h_256_a_4_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_4_h_256_a_4_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_4_h_256_a_4_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-4_H-256_A-4_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_512_a_8_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_512_a_8_cord19_200616_en.md new file mode 100644 index 00000000000000..0efa00cf262be3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_512_a_8_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_4_h_512_a_8_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_4_h_512_a_8_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_4_h_512_a_8_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_4_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580980693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_4_h_512_a_8_cord19_200616_en_5.1.1_3.0_1694580980693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_4_h_512_a_8_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_4_h_512_a_8_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_4_h_512_a_8_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-4_H-512_A-8_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_768_a_12_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_768_a_12_cord19_200616_en.md new file mode 100644 index 00000000000000..e07948da9a1681 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_4_h_768_a_12_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_4_h_768_a_12_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_4_h_768_a_12_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_4_h_768_a_12_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_4_h_768_a_12_cord19_200616_en_5.1.1_3.0_1694581077990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_4_h_768_a_12_cord19_200616_en_5.1.1_3.0_1694581077990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_4_h_768_a_12_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_4_h_768_a_12_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_4_h_768_a_12_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|194.4 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-4_H-768_A-12_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_6_h_128_a_2_cord19_200616_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_6_h_128_a_2_cord19_200616_en.md new file mode 100644 index 00000000000000..6045aee085b47f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_l_6_h_128_a_2_cord19_200616_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_l_6_h_128_a_2_cord19_200616 BertEmbeddings from aodiniz +author: John Snow Labs +name: bert_uncased_l_6_h_128_a_2_cord19_200616 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_l_6_h_128_a_2_cord19_200616` is a English model originally trained by aodiniz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_l_6_h_128_a_2_cord19_200616_en_5.1.1_3.0_1694581174056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_l_6_h_128_a_2_cord19_200616_en_5.1.1_3.0_1694581174056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_l_6_h_128_a_2_cord19_200616","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_l_6_h_128_a_2_cord19_200616", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_l_6_h_128_a_2_cord19_200616| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|19.6 MB| + +## References + +https://huggingface.co/aodiniz/bert_uncased_L-6_H-128_A-2_cord19-200616 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_2xthicc_multi_emails_hq_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_2xthicc_multi_emails_hq_en.md new file mode 100644 index 00000000000000..01094daa40f30b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_2xthicc_multi_emails_hq_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_tiny_2xthicc_multi_emails_hq BertEmbeddings from postbot +author: John Snow Labs +name: bert_uncased_tiny_2xthicc_multi_emails_hq +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_tiny_2xthicc_multi_emails_hq` is a English model originally trained by postbot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_tiny_2xthicc_multi_emails_hq_en_5.1.1_3.0_1694575965085.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_tiny_2xthicc_multi_emails_hq_en_5.1.1_3.0_1694575965085.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_tiny_2xthicc_multi_emails_hq","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_tiny_2xthicc_multi_emails_hq", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_tiny_2xthicc_multi_emails_hq| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|18.1 MB| + +## References + +https://huggingface.co/postbot/bert_uncased_tiny_2xthicc-multi-emails-hq \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_multi_emails_hq_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_multi_emails_hq_en.md new file mode 100644 index 00000000000000..1bc9cd4fed9901 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_uncased_tiny_multi_emails_hq_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_uncased_tiny_multi_emails_hq BertEmbeddings from postbot +author: John Snow Labs +name: bert_uncased_tiny_multi_emails_hq +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_uncased_tiny_multi_emails_hq` is a English model originally trained by postbot. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_uncased_tiny_multi_emails_hq_en_5.1.1_3.0_1694575894615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_uncased_tiny_multi_emails_hq_en_5.1.1_3.0_1694575894615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_uncased_tiny_multi_emails_hq","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_uncased_tiny_multi_emails_hq", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_uncased_tiny_multi_emails_hq| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/postbot/bert_uncased_tiny-multi-emails-hq \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_web_bulgarian_bg.md b/docs/_posts/ahmedlone127/2023-09-13-bert_web_bulgarian_bg.md new file mode 100644 index 00000000000000..5dd4d50c9edd33 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_web_bulgarian_bg.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Bulgarian bert_web_bulgarian BertEmbeddings from usmiva +author: John Snow Labs +name: bert_web_bulgarian +date: 2023-09-13 +tags: [bert, bg, open_source, fill_mask, onnx] +task: Embeddings +language: bg +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_web_bulgarian` is a Bulgarian model originally trained by usmiva. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_web_bulgarian_bg_5.1.1_3.0_1694590200921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_web_bulgarian_bg_5.1.1_3.0_1694590200921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_web_bulgarian","bg") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_web_bulgarian", "bg") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_web_bulgarian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|bg| +|Size:|406.9 MB| + +## References + +https://huggingface.co/usmiva/bert-web-bg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_wwm_words_law_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_wwm_words_law_en.md new file mode 100644 index 00000000000000..b6d165d851dc51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_wwm_words_law_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_wwm_words_law BertEmbeddings from ssbuild +author: John Snow Labs +name: bert_wwm_words_law +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_wwm_words_law` is a English model originally trained by ssbuild. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_wwm_words_law_en_5.1.1_3.0_1694583048594.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_wwm_words_law_en_5.1.1_3.0_1694583048594.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_wwm_words_law","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_wwm_words_law", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_wwm_words_law| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|424.2 MB| + +## References + +https://huggingface.co/ssbuild/bert_wwm_words_law \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_en.md new file mode 100644 index 00000000000000..20781554b69aa9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_yelp BertEmbeddings from spicecloud +author: John Snow Labs +name: bert_yelp +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_yelp` is a English model originally trained by spicecloud. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_yelp_en_5.1.1_3.0_1694578158390.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_yelp_en_5.1.1_3.0_1694578158390.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_yelp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_yelp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_yelp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/spicecloud/bert-yelp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_local_en.md b/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_local_en.md new file mode 100644 index 00000000000000..34590cb2b70605 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bert_yelp_local_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_yelp_local BertEmbeddings from spicecloud +author: John Snow Labs +name: bert_yelp_local +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_yelp_local` is a English model originally trained by spicecloud. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_yelp_local_en_5.1.1_3.0_1694578003743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_yelp_local_en_5.1.1_3.0_1694578003743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_yelp_local","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_yelp_local", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_yelp_local| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/spicecloud/bert-yelp-local \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertbek_news_big_cased_uz.md b/docs/_posts/ahmedlone127/2023-09-13-bertbek_news_big_cased_uz.md new file mode 100644 index 00000000000000..70e811cdbd63a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertbek_news_big_cased_uz.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Uzbek bertbek_news_big_cased BertEmbeddings from elmurod1202 +author: John Snow Labs +name: bertbek_news_big_cased +date: 2023-09-13 +tags: [bert, uz, open_source, fill_mask, onnx] +task: Embeddings +language: uz +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertbek_news_big_cased` is a Uzbek model originally trained by elmurod1202. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertbek_news_big_cased_uz_5.1.1_3.0_1694583891350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertbek_news_big_cased_uz_5.1.1_3.0_1694583891350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertbek_news_big_cased","uz") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertbek_news_big_cased", "uz") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertbek_news_big_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|uz| +|Size:|405.5 MB| + +## References + +https://huggingface.co/elmurod1202/bertbek-news-big-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertcl_en.md b/docs/_posts/ahmedlone127/2023-09-13-bertcl_en.md new file mode 100644 index 00000000000000..2a70f8c56413b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertcl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertcl BertEmbeddings from georgepu1 +author: John Snow Labs +name: bertcl +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertcl` is a English model originally trained by georgepu1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertcl_en_5.1.1_3.0_1694622288755.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertcl_en_5.1.1_3.0_1694622288755.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertcl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertcl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertcl| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/georgepu1/bertcl \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertformalityclassificiation_en.md b/docs/_posts/ahmedlone127/2023-09-13-bertformalityclassificiation_en.md new file mode 100644 index 00000000000000..b0719c19dfc7ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertformalityclassificiation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertformalityclassificiation BertEmbeddings from qdenisq +author: John Snow Labs +name: bertformalityclassificiation +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertformalityclassificiation` is a English model originally trained by qdenisq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertformalityclassificiation_en_5.1.1_3.0_1694565715751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertformalityclassificiation_en_5.1.1_3.0_1694565715751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertformalityclassificiation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertformalityclassificiation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertformalityclassificiation| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/qdenisq/BertFormalityClassificiation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertimbau_base_finetuned_lener_breton_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bertimbau_base_finetuned_lener_breton_pt.md new file mode 100644 index 00000000000000..a5e45323acaf3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertimbau_base_finetuned_lener_breton_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertimbau_base_finetuned_lener_breton BertEmbeddings from Luciano +author: John Snow Labs +name: bertimbau_base_finetuned_lener_breton +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbau_base_finetuned_lener_breton` is a Portuguese model originally trained by Luciano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbau_base_finetuned_lener_breton_pt_5.1.1_3.0_1694580858415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbau_base_finetuned_lener_breton_pt_5.1.1_3.0_1694580858415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertimbau_base_finetuned_lener_breton","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertimbau_base_finetuned_lener_breton", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbau_base_finetuned_lener_breton| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/Luciano/bertimbau-base-finetuned-lener-br \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertimbau_legal_pt.md b/docs/_posts/ahmedlone127/2023-09-13-bertimbau_legal_pt.md new file mode 100644 index 00000000000000..6bfe1d93037752 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertimbau_legal_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertimbau_legal BertEmbeddings from mynoguti +author: John Snow Labs +name: bertimbau_legal +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbau_legal` is a Portuguese model originally trained by mynoguti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbau_legal_pt_5.1.1_3.0_1694571645436.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbau_legal_pt_5.1.1_3.0_1694571645436.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertimbau_legal","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertimbau_legal", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbau_legal| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/mynoguti/BERTimbau_Legal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertinho_galician_base_cased_gl.md b/docs/_posts/ahmedlone127/2023-09-13-bertinho_galician_base_cased_gl.md new file mode 100644 index 00000000000000..2d88a958aad3c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertinho_galician_base_cased_gl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Galician bertinho_galician_base_cased BertEmbeddings from dvilares +author: John Snow Labs +name: bertinho_galician_base_cased +date: 2023-09-13 +tags: [bert, gl, open_source, fill_mask, onnx] +task: Embeddings +language: gl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertinho_galician_base_cased` is a Galician model originally trained by dvilares. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertinho_galician_base_cased_gl_5.1.1_3.0_1694628723686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertinho_galician_base_cased_gl_5.1.1_3.0_1694628723686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertinho_galician_base_cased","gl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertinho_galician_base_cased", "gl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertinho_galician_base_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|gl| +|Size:|405.3 MB| + +## References + +https://huggingface.co/dvilares/bertinho-gl-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertinho_galician_small_cased_gl.md b/docs/_posts/ahmedlone127/2023-09-13-bertinho_galician_small_cased_gl.md new file mode 100644 index 00000000000000..db394cb9c2da90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertinho_galician_small_cased_gl.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Galician bertinho_galician_small_cased BertEmbeddings from dvilares +author: John Snow Labs +name: bertinho_galician_small_cased +date: 2023-09-13 +tags: [bert, gl, open_source, fill_mask, onnx] +task: Embeddings +language: gl +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertinho_galician_small_cased` is a Galician model originally trained by dvilares. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertinho_galician_small_cased_gl_5.1.1_3.0_1694629051948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertinho_galician_small_cased_gl_5.1.1_3.0_1694629051948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertinho_galician_small_cased","gl") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertinho_galician_small_cased", "gl") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertinho_galician_small_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|gl| +|Size:|245.8 MB| + +## References + +https://huggingface.co/dvilares/bertinho-gl-small-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly09_en.md b/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly09_en.md new file mode 100644 index 00000000000000..39b35de7a1c1b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly09_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly09 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly09 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly09` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly09_en_5.1.1_3.0_1694563274040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly09_en_5.1.1_3.0_1694563274040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly09","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly09", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly09| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly09 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly128_en.md b/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly128_en.md new file mode 100644 index 00000000000000..cb61fc45eb9617 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataallqonly128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataallqonly128 BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataallqonly128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataallqonly128` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly128_en_5.1.1_3.0_1694563422405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataallqonly128_en_5.1.1_3.0_1694563422405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataallqonly128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataallqonly128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataallqonly128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataALLQonly128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataqa20k_en.md b/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataqa20k_en.md new file mode 100644 index 00000000000000..c0589fdb8f0967 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertjewdialdataqa20k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertjewdialdataqa20k BertEmbeddings from Jeska +author: John Snow Labs +name: bertjewdialdataqa20k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertjewdialdataqa20k` is a English model originally trained by Jeska. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertjewdialdataqa20k_en_5.1.1_3.0_1694563599531.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertjewdialdataqa20k_en_5.1.1_3.0_1694563599531.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertjewdialdataqa20k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertjewdialdataqa20k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertjewdialdataqa20k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/Jeska/BertjeWDialDataQA20k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bertu_mt.md b/docs/_posts/ahmedlone127/2023-09-13-bertu_mt.md new file mode 100644 index 00000000000000..31857f561ec391 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bertu_mt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Maltese bertu BertEmbeddings from MLRS +author: John Snow Labs +name: bertu +date: 2023-09-13 +tags: [bert, mt, open_source, fill_mask, onnx] +task: Embeddings +language: mt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertu` is a Maltese model originally trained by MLRS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertu_mt_5.1.1_3.0_1694635154194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertu_mt_5.1.1_3.0_1694635154194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertu","mt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertu", "mt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mt| +|Size:|468.7 MB| + +## References + +https://huggingface.co/MLRS/BERTu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-beto_base_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-beto_base_cased_en.md new file mode 100644 index 00000000000000..f1be7bdb9a449d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-beto_base_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English beto_base_cased BertEmbeddings from espejelomar +author: John Snow Labs +name: beto_base_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_base_cased` is a English model originally trained by espejelomar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_base_cased_en_5.1.1_3.0_1694634951862.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_base_cased_en_5.1.1_3.0_1694634951862.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("beto_base_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("beto_base_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_base_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/espejelomar/beto-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-beto_chile_politico_1990_2019_es.md b/docs/_posts/ahmedlone127/2023-09-13-beto_chile_politico_1990_2019_es.md new file mode 100644 index 00000000000000..25b0dce4ddca9e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-beto_chile_politico_1990_2019_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish beto_chile_politico_1990_2019 BertEmbeddings from lucas-valenzuela-everke +author: John Snow Labs +name: beto_chile_politico_1990_2019 +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`beto_chile_politico_1990_2019` is a Castilian, Spanish model originally trained by lucas-valenzuela-everke. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/beto_chile_politico_1990_2019_es_5.1.1_3.0_1694593619363.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/beto_chile_politico_1990_2019_es_5.1.1_3.0_1694593619363.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("beto_chile_politico_1990_2019","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("beto_chile_politico_1990_2019", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|beto_chile_politico_1990_2019| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.7 MB| + +## References + +https://huggingface.co/lucas-valenzuela-everke/BETO-chile-politico-1990-2019 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-betonews_bodycontext_en.md b/docs/_posts/ahmedlone127/2023-09-13-betonews_bodycontext_en.md new file mode 100644 index 00000000000000..0cde8eb199aa3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-betonews_bodycontext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English betonews_bodycontext BertEmbeddings from finiteautomata +author: John Snow Labs +name: betonews_bodycontext +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`betonews_bodycontext` is a English model originally trained by finiteautomata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/betonews_bodycontext_en_5.1.1_3.0_1694638060106.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/betonews_bodycontext_en_5.1.1_3.0_1694638060106.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("betonews_bodycontext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("betonews_bodycontext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|betonews_bodycontext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.3 MB| + +## References + +https://huggingface.co/finiteautomata/betonews-bodycontext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-betonews_nonecontext_en.md b/docs/_posts/ahmedlone127/2023-09-13-betonews_nonecontext_en.md new file mode 100644 index 00000000000000..20047403ab7a2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-betonews_nonecontext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English betonews_nonecontext BertEmbeddings from finiteautomata +author: John Snow Labs +name: betonews_nonecontext +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`betonews_nonecontext` is a English model originally trained by finiteautomata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/betonews_nonecontext_en_5.1.1_3.0_1694638603674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/betonews_nonecontext_en_5.1.1_3.0_1694638603674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("betonews_nonecontext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("betonews_nonecontext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|betonews_nonecontext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.0 MB| + +## References + +https://huggingface.co/finiteautomata/betonews-nonecontext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-betonews_tweetcontext_en.md b/docs/_posts/ahmedlone127/2023-09-13-betonews_tweetcontext_en.md new file mode 100644 index 00000000000000..62a20d9dfe8eb2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-betonews_tweetcontext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English betonews_tweetcontext BertEmbeddings from piuba-bigdata +author: John Snow Labs +name: betonews_tweetcontext +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`betonews_tweetcontext` is a English model originally trained by piuba-bigdata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/betonews_tweetcontext_en_5.1.1_3.0_1694639163920.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/betonews_tweetcontext_en_5.1.1_3.0_1694639163920.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("betonews_tweetcontext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("betonews_tweetcontext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|betonews_tweetcontext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/piuba-bigdata/betonews-tweetcontext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bibert_20_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-13-bibert_20_epochs_en.md new file mode 100644 index 00000000000000..6a39e1db9237cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bibert_20_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bibert_20_epochs BertEmbeddings from Embible +author: John Snow Labs +name: bibert_20_epochs +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bibert_20_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bibert_20_epochs_en_5.1.1_3.0_1694563935647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bibert_20_epochs_en_5.1.1_3.0_1694563935647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bibert_20_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bibert_20_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bibert_20_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|432.5 MB| + +## References + +https://huggingface.co/Embible/bibert-20-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biblitbert_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-biblitbert_1_en.md new file mode 100644 index 00000000000000..8824d2a4111687 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biblitbert_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biblitbert_1 BertEmbeddings from vppvgit +author: John Snow Labs +name: biblitbert_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biblitbert_1` is a English model originally trained by vppvgit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biblitbert_1_en_5.1.1_3.0_1694582201983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biblitbert_1_en_5.1.1_3.0_1694582201983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biblitbert_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biblitbert_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biblitbert_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.7 MB| + +## References + +https://huggingface.co/vppvgit/BiblItBERT-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biblitbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-biblitbert_en.md new file mode 100644 index 00000000000000..0add19e1696bfe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biblitbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biblitbert BertEmbeddings from vppvgit +author: John Snow Labs +name: biblitbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biblitbert` is a English model originally trained by vppvgit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biblitbert_en_5.1.1_3.0_1694582340360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biblitbert_en_5.1.1_3.0_1694582340360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biblitbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biblitbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biblitbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.6 MB| + +## References + +https://huggingface.co/vppvgit/BiblItBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_emilyalsentzer_en.md b/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_emilyalsentzer_en.md new file mode 100644 index 00000000000000..3de9706bf463b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_emilyalsentzer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_clinicalbert_emilyalsentzer BertEmbeddings from emilyalsentzer +author: John Snow Labs +name: bio_clinicalbert_emilyalsentzer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_clinicalbert_emilyalsentzer` is a English model originally trained by emilyalsentzer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_emilyalsentzer_en_5.1.1_3.0_1694631294413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_emilyalsentzer_en_5.1.1_3.0_1694631294413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_clinicalbert_emilyalsentzer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_clinicalbert_emilyalsentzer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_clinicalbert_emilyalsentzer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/emilyalsentzer/Bio_ClinicalBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_finetuning_data_en.md b/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_finetuning_data_en.md new file mode 100644 index 00000000000000..53b2f8386e25a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_finetuning_data_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_clinicalbert_finetuning_data BertEmbeddings from Dinithi +author: John Snow Labs +name: bio_clinicalbert_finetuning_data +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_clinicalbert_finetuning_data` is a English model originally trained by Dinithi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_finetuning_data_en_5.1.1_3.0_1694589997412.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_finetuning_data_en_5.1.1_3.0_1694589997412.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_clinicalbert_finetuning_data","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_clinicalbert_finetuning_data", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_clinicalbert_finetuning_data| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/Dinithi/Bio_ClinicalBERT-finetuning-data \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_surgicalcardiothoracic_en.md b/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_surgicalcardiothoracic_en.md new file mode 100644 index 00000000000000..99e04d422764e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bio_clinicalbert_surgicalcardiothoracic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_clinicalbert_surgicalcardiothoracic BertEmbeddings from Gaborandi +author: John Snow Labs +name: bio_clinicalbert_surgicalcardiothoracic +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_clinicalbert_surgicalcardiothoracic` is a English model originally trained by Gaborandi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_surgicalcardiothoracic_en_5.1.1_3.0_1694620142173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_clinicalbert_surgicalcardiothoracic_en_5.1.1_3.0_1694620142173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_clinicalbert_surgicalcardiothoracic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_clinicalbert_surgicalcardiothoracic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_clinicalbert_surgicalcardiothoracic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.6 MB| + +## References + +https://huggingface.co/Gaborandi/Bio_ClinicalBERT-SurgicalCardiothoracic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bio_discharge_summary_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-bio_discharge_summary_bert_en.md new file mode 100644 index 00000000000000..8a984e5c591181 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bio_discharge_summary_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_discharge_summary_bert BertEmbeddings from emilyalsentzer +author: John Snow Labs +name: bio_discharge_summary_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_discharge_summary_bert` is a English model originally trained by emilyalsentzer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_discharge_summary_bert_en_5.1.1_3.0_1694631653208.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_discharge_summary_bert_en_5.1.1_3.0_1694631653208.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_discharge_summary_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_discharge_summary_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_discharge_summary_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/emilyalsentzer/Bio_Discharge_Summary_BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bio_minialbert_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-bio_minialbert_128_en.md new file mode 100644 index 00000000000000..735a049c1999b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bio_minialbert_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_minialbert_128 BertEmbeddings from nlpie +author: John Snow Labs +name: bio_minialbert_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_minialbert_128` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_minialbert_128_en_5.1.1_3.0_1694568055117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_minialbert_128_en_5.1.1_3.0_1694568055117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_minialbert_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_minialbert_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_minialbert_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.6 MB| + +## References + +https://huggingface.co/nlpie/bio-miniALBERT-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bio_tinybert_en.md b/docs/_posts/ahmedlone127/2023-09-13-bio_tinybert_en.md new file mode 100644 index 00000000000000..a25dbbbc345763 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bio_tinybert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bio_tinybert BertEmbeddings from nlpie +author: John Snow Labs +name: bio_tinybert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bio_tinybert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bio_tinybert_en_5.1.1_3.0_1694577599069.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bio_tinybert_en_5.1.1_3.0_1694577599069.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bio_tinybert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bio_tinybert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bio_tinybert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|53.8 MB| + +## References + +https://huggingface.co/nlpie/bio-tinybert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobert_base_1.2_en.md b/docs/_posts/ahmedlone127/2023-09-13-biobert_base_1.2_en.md new file mode 100644 index 00000000000000..37f3e529a1179b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobert_base_1.2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_base_1.2 BertEmbeddings from abnuel +author: John Snow Labs +name: biobert_base_1.2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_base_1.2` is a English model originally trained by abnuel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_base_1.2_en_5.1.1_3.0_1694573075905.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_base_1.2_en_5.1.1_3.0_1694573075905.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_base_1.2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_base_1.2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_base_1.2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/abnuel/biobert-base_1.2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobert_base_cased_v1.2_en.md b/docs/_posts/ahmedlone127/2023-09-13-biobert_base_cased_v1.2_en.md new file mode 100644 index 00000000000000..483e34d57ef733 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobert_base_cased_v1.2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_base_cased_v1.2 BertEmbeddings from dmis-lab +author: John Snow Labs +name: biobert_base_cased_v1.2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_base_cased_v1.2` is a English model originally trained by dmis-lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_base_cased_v1.2_en_5.1.1_3.0_1694625923823.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_base_cased_v1.2_en_5.1.1_3.0_1694625923823.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_base_cased_v1.2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_base_cased_v1.2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_base_cased_v1.2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/dmis-lab/biobert-base-cased-v1.2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobert_base_cased_v1.2_finetuned_smpc_en.md b/docs/_posts/ahmedlone127/2023-09-13-biobert_base_cased_v1.2_finetuned_smpc_en.md new file mode 100644 index 00000000000000..c46a71da294d18 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobert_base_cased_v1.2_finetuned_smpc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_base_cased_v1.2_finetuned_smpc BertEmbeddings from sophy +author: John Snow Labs +name: biobert_base_cased_v1.2_finetuned_smpc +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_base_cased_v1.2_finetuned_smpc` is a English model originally trained by sophy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_base_cased_v1.2_finetuned_smpc_en_5.1.1_3.0_1694635769664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_base_cased_v1.2_finetuned_smpc_en_5.1.1_3.0_1694635769664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_base_cased_v1.2_finetuned_smpc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_base_cased_v1.2_finetuned_smpc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_base_cased_v1.2_finetuned_smpc| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/sophy/biobert-base-cased-v1.2-finetuned-smpc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobert_giotto_en.md b/docs/_posts/ahmedlone127/2023-09-13-biobert_giotto_en.md new file mode 100644 index 00000000000000..797b623dfe1836 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobert_giotto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biobert_giotto BertEmbeddings from dpalominop +author: John Snow Labs +name: biobert_giotto +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobert_giotto` is a English model originally trained by dpalominop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobert_giotto_en_5.1.1_3.0_1694626974873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobert_giotto_en_5.1.1_3.0_1694626974873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobert_giotto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobert_giotto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobert_giotto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/dpalominop/biobert-giotto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobertpt_all_pt.md b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_all_pt.md new file mode 100644 index 00000000000000..4c949a0e8a3ce2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_all_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese biobertpt_all BertEmbeddings from pucpr +author: John Snow Labs +name: biobertpt_all +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobertpt_all` is a Portuguese model originally trained by pucpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobertpt_all_pt_5.1.1_3.0_1694564346908.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobertpt_all_pt_5.1.1_3.0_1694564346908.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobertpt_all","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobertpt_all", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobertpt_all| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|664.8 MB| + +## References + +https://huggingface.co/pucpr/biobertpt-all \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobertpt_bio_pt.md b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_bio_pt.md new file mode 100644 index 00000000000000..a99ea40f5089cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_bio_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese biobertpt_bio BertEmbeddings from pucpr +author: John Snow Labs +name: biobertpt_bio +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobertpt_bio` is a Portuguese model originally trained by pucpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobertpt_bio_pt_5.1.1_3.0_1694564511348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobertpt_bio_pt_5.1.1_3.0_1694564511348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobertpt_bio","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobertpt_bio", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobertpt_bio| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|665.0 MB| + +## References + +https://huggingface.co/pucpr/biobertpt-bio \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biobertpt_clin_pt.md b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_clin_pt.md new file mode 100644 index 00000000000000..f1500d423fab4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biobertpt_clin_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese biobertpt_clin BertEmbeddings from pucpr +author: John Snow Labs +name: biobertpt_clin +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobertpt_clin` is a Portuguese model originally trained by pucpr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobertpt_clin_pt_5.1.1_3.0_1694564736648.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobertpt_clin_pt_5.1.1_3.0_1694564736648.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobertpt_clin","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobertpt_clin", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobertpt_clin| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|665.0 MB| + +## References + +https://huggingface.co/pucpr/biobertpt-clin \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bioformer_16l_en.md b/docs/_posts/ahmedlone127/2023-09-13-bioformer_16l_en.md new file mode 100644 index 00000000000000..fadb7c01fd2fbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bioformer_16l_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bioformer_16l BertEmbeddings from bioformers +author: John Snow Labs +name: bioformer_16l +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioformer_16l` is a English model originally trained by bioformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioformer_16l_en_5.1.1_3.0_1694566235721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioformer_16l_en_5.1.1_3.0_1694566235721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bioformer_16l","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bioformer_16l", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioformer_16l| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|155.3 MB| + +## References + +https://huggingface.co/bioformers/bioformer-16L \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bioformer_8l_en.md b/docs/_posts/ahmedlone127/2023-09-13-bioformer_8l_en.md new file mode 100644 index 00000000000000..f46bd2eebccb21 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bioformer_8l_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bioformer_8l BertEmbeddings from bioformers +author: John Snow Labs +name: bioformer_8l +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioformer_8l` is a English model originally trained by bioformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioformer_8l_en_5.1.1_3.0_1694588927608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioformer_8l_en_5.1.1_3.0_1694588927608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bioformer_8l","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bioformer_8l", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioformer_8l| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|158.5 MB| + +## References + +https://huggingface.co/bioformers/bioformer-8L \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bioformer_litcovid_en.md b/docs/_posts/ahmedlone127/2023-09-13-bioformer_litcovid_en.md new file mode 100644 index 00000000000000..612f8da6ab8354 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bioformer_litcovid_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bioformer_litcovid BertEmbeddings from bioformers +author: John Snow Labs +name: bioformer_litcovid +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioformer_litcovid` is a English model originally trained by bioformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioformer_litcovid_en_5.1.1_3.0_1694626519703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioformer_litcovid_en_5.1.1_3.0_1694626519703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bioformer_litcovid","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bioformer_litcovid", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioformer_litcovid| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|158.5 MB| + +## References + +https://huggingface.co/bioformers/bioformer-litcovid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biomedical_en.md b/docs/_posts/ahmedlone127/2023-09-13-biomedical_en.md new file mode 100644 index 00000000000000..b913be3cc32eba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biomedical_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biomedical BertEmbeddings from ajitrajasekharan +author: John Snow Labs +name: biomedical +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomedical` is a English model originally trained by ajitrajasekharan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomedical_en_5.1.1_3.0_1694577740537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomedical_en_5.1.1_3.0_1694577740537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biomedical","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biomedical", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomedical| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/ajitrajasekharan/biomedical \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bioptimus_en.md b/docs/_posts/ahmedlone127/2023-09-13-bioptimus_en.md new file mode 100644 index 00000000000000..9338e76033d79b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bioptimus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bioptimus BertEmbeddings from rttl-ai +author: John Snow Labs +name: bioptimus +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bioptimus` is a English model originally trained by rttl-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bioptimus_en_5.1.1_3.0_1694587762594.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bioptimus_en_5.1.1_3.0_1694587762594.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bioptimus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bioptimus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bioptimus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/rttl-ai/BIOptimus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-biovocabbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-biovocabbert_en.md new file mode 100644 index 00000000000000..c27bf67c5f0b7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-biovocabbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biovocabbert BertEmbeddings from osunlp +author: John Snow Labs +name: biovocabbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biovocabbert` is a English model originally trained by osunlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biovocabbert_en_5.1.1_3.0_1694577552564.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biovocabbert_en_5.1.1_3.0_1694577552564.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biovocabbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biovocabbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biovocabbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|352.0 MB| + +## References + +https://huggingface.co/osunlp/BioVocabBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-blade_english_chinese_en.md b/docs/_posts/ahmedlone127/2023-09-13-blade_english_chinese_en.md new file mode 100644 index 00000000000000..a5ca8f67175913 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-blade_english_chinese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English blade_english_chinese BertEmbeddings from srnair +author: John Snow Labs +name: blade_english_chinese +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`blade_english_chinese` is a English model originally trained by srnair. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/blade_english_chinese_en_5.1.1_3.0_1694615597365.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/blade_english_chinese_en_5.1.1_3.0_1694615597365.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("blade_english_chinese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("blade_english_chinese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|blade_english_chinese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.7 MB| + +## References + +https://huggingface.co/srnair/blade-en-zh \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-blade_english_russian_en.md b/docs/_posts/ahmedlone127/2023-09-13-blade_english_russian_en.md new file mode 100644 index 00000000000000..479b5c207277dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-blade_english_russian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English blade_english_russian BertEmbeddings from srnair +author: John Snow Labs +name: blade_english_russian +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`blade_english_russian` is a English model originally trained by srnair. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/blade_english_russian_en_5.1.1_3.0_1694620469182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/blade_english_russian_en_5.1.1_3.0_1694620469182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("blade_english_russian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("blade_english_russian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|blade_english_russian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|428.2 MB| + +## References + +https://huggingface.co/srnair/blade-en-ru \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-bnbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-bnbert_en.md new file mode 100644 index 00000000000000..06ab195de4a6c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-bnbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bnbert BertEmbeddings from KamrusSamad +author: John Snow Labs +name: bnbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bnbert` is a English model originally trained by KamrusSamad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bnbert_en_5.1.1_3.0_1694564272370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bnbert_en_5.1.1_3.0_1694564272370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bnbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bnbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bnbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|53.8 MB| + +## References + +https://huggingface.co/KamrusSamad/bnbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-btfhbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-btfhbert_en.md new file mode 100644 index 00000000000000..f733c2d13b55ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-btfhbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English btfhbert BertEmbeddings from hsc748NLP +author: John Snow Labs +name: btfhbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`btfhbert` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/btfhbert_en_5.1.1_3.0_1694599994851.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/btfhbert_en_5.1.1_3.0_1694599994851.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("btfhbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("btfhbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|btfhbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/hsc748NLP/BtfhBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_eli5_mlm_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_eli5_mlm_model_en.md new file mode 100644 index 00000000000000..5bab73d8dc62fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_eli5_mlm_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_awesome_eli5_mlm_model BertEmbeddings from JackWolfard +author: John Snow Labs +name: burmese_awesome_eli5_mlm_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_eli5_mlm_model` is a English model originally trained by JackWolfard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_eli5_mlm_model_en_5.1.1_3.0_1694583256576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_eli5_mlm_model_en_5.1.1_3.0_1694583256576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("burmese_awesome_eli5_mlm_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("burmese_awesome_eli5_mlm_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_eli5_mlm_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|523.9 KB| + +## References + +https://huggingface.co/JackWolfard/my_awesome_eli5_mlm_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_mlm_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_mlm_model_en.md new file mode 100644 index 00000000000000..dd73175446486a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_mlm_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_awesome_mlm_model BertEmbeddings from wajdii +author: John Snow Labs +name: burmese_awesome_mlm_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_mlm_model` is a English model originally trained by wajdii. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_mlm_model_en_5.1.1_3.0_1694618818921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_mlm_model_en_5.1.1_3.0_1694618818921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("burmese_awesome_mlm_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("burmese_awesome_mlm_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_mlm_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/wajdii/my_awesome_mlm_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_model_alexyalunin_en.md b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_model_alexyalunin_en.md new file mode 100644 index 00000000000000..7a02b6b052b48a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-burmese_awesome_model_alexyalunin_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_awesome_model_alexyalunin BertEmbeddings from alexyalunin +author: John Snow Labs +name: burmese_awesome_model_alexyalunin +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_awesome_model_alexyalunin` is a English model originally trained by alexyalunin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_alexyalunin_en_5.1.1_3.0_1694578520145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_awesome_model_alexyalunin_en_5.1.1_3.0_1694578520145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("burmese_awesome_model_alexyalunin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("burmese_awesome_model_alexyalunin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_awesome_model_alexyalunin| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.3 MB| + +## References + +https://huggingface.co/alexyalunin/my-awesome-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_en.md b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_en.md new file mode 100644 index 00000000000000..411a6b8bd0f3a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English carlbert_webex BertEmbeddings from aditeyabaral +author: John Snow Labs +name: carlbert_webex +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`carlbert_webex` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/carlbert_webex_en_5.1.1_3.0_1694602888545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/carlbert_webex_en_5.1.1_3.0_1694602888545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("carlbert_webex","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("carlbert_webex", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|carlbert_webex| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/aditeyabaral/carlbert-webex \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_aditeyabaral_en.md b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_aditeyabaral_en.md new file mode 100644 index 00000000000000..824c548082c3a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_aditeyabaral_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English carlbert_webex_mlm_aditeyabaral BertEmbeddings from aditeyabaral +author: John Snow Labs +name: carlbert_webex_mlm_aditeyabaral +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`carlbert_webex_mlm_aditeyabaral` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/carlbert_webex_mlm_aditeyabaral_en_5.1.1_3.0_1694606195386.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/carlbert_webex_mlm_aditeyabaral_en_5.1.1_3.0_1694606195386.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("carlbert_webex_mlm_aditeyabaral","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("carlbert_webex_mlm_aditeyabaral", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|carlbert_webex_mlm_aditeyabaral| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/aditeyabaral/carlbert-webex-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_spatial_en.md b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_spatial_en.md new file mode 100644 index 00000000000000..cf439778d38e8a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_spatial_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English carlbert_webex_mlm_spatial BertEmbeddings from aditeyabaral +author: John Snow Labs +name: carlbert_webex_mlm_spatial +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`carlbert_webex_mlm_spatial` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/carlbert_webex_mlm_spatial_en_5.1.1_3.0_1694604986013.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/carlbert_webex_mlm_spatial_en_5.1.1_3.0_1694604986013.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("carlbert_webex_mlm_spatial","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("carlbert_webex_mlm_spatial", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|carlbert_webex_mlm_spatial| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/aditeyabaral/carlbert-webex-mlm-spatial \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_vignesh95_en.md b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_vignesh95_en.md new file mode 100644 index 00000000000000..a4f09ad74f6873 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_vignesh95_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English carlbert_webex_mlm_vignesh95 BertEmbeddings from Vignesh95 +author: John Snow Labs +name: carlbert_webex_mlm_vignesh95 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`carlbert_webex_mlm_vignesh95` is a English model originally trained by Vignesh95. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/carlbert_webex_mlm_vignesh95_en_5.1.1_3.0_1694616371291.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/carlbert_webex_mlm_vignesh95_en_5.1.1_3.0_1694616371291.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("carlbert_webex_mlm_vignesh95","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("carlbert_webex_mlm_vignesh95", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|carlbert_webex_mlm_vignesh95| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Vignesh95/carlbert-webex-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_wolof_recipient_en.md b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_wolof_recipient_en.md new file mode 100644 index 00000000000000..68710b1f4dc39a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-carlbert_webex_mlm_wolof_recipient_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English carlbert_webex_mlm_wolof_recipient BertEmbeddings from Vignesh95 +author: John Snow Labs +name: carlbert_webex_mlm_wolof_recipient +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`carlbert_webex_mlm_wolof_recipient` is a English model originally trained by Vignesh95. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/carlbert_webex_mlm_wolof_recipient_en_5.1.1_3.0_1694616716297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/carlbert_webex_mlm_wolof_recipient_en_5.1.1_3.0_1694616716297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("carlbert_webex_mlm_wolof_recipient","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("carlbert_webex_mlm_wolof_recipient", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|carlbert_webex_mlm_wolof_recipient| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Vignesh95/carlbert-webex-mlm-wo-recipient \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_bert_cloth_en.md b/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_bert_cloth_en.md new file mode 100644 index 00000000000000..1448868f11e46b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_bert_cloth_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cdgp_chilean_sign_language_bert_cloth BertEmbeddings from AndyChiang +author: John Snow Labs +name: cdgp_chilean_sign_language_bert_cloth +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cdgp_chilean_sign_language_bert_cloth` is a English model originally trained by AndyChiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cdgp_chilean_sign_language_bert_cloth_en_5.1.1_3.0_1694592903470.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cdgp_chilean_sign_language_bert_cloth_en_5.1.1_3.0_1694592903470.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cdgp_chilean_sign_language_bert_cloth","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cdgp_chilean_sign_language_bert_cloth", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cdgp_chilean_sign_language_bert_cloth| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/AndyChiang/cdgp-csg-bert-cloth \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_bert_dgen_en.md b/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_bert_dgen_en.md new file mode 100644 index 00000000000000..ce5acdfbd83008 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_bert_dgen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cdgp_chilean_sign_language_bert_dgen BertEmbeddings from AndyChiang +author: John Snow Labs +name: cdgp_chilean_sign_language_bert_dgen +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cdgp_chilean_sign_language_bert_dgen` is a English model originally trained by AndyChiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cdgp_chilean_sign_language_bert_dgen_en_5.1.1_3.0_1694593050555.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cdgp_chilean_sign_language_bert_dgen_en_5.1.1_3.0_1694593050555.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cdgp_chilean_sign_language_bert_dgen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cdgp_chilean_sign_language_bert_dgen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cdgp_chilean_sign_language_bert_dgen| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/AndyChiang/cdgp-csg-bert-dgen \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_scibert_cloth_en.md b/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_scibert_cloth_en.md new file mode 100644 index 00000000000000..1faa563f14cc99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_scibert_cloth_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cdgp_chilean_sign_language_scibert_cloth BertEmbeddings from AndyChiang +author: John Snow Labs +name: cdgp_chilean_sign_language_scibert_cloth +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cdgp_chilean_sign_language_scibert_cloth` is a English model originally trained by AndyChiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cdgp_chilean_sign_language_scibert_cloth_en_5.1.1_3.0_1694593597348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cdgp_chilean_sign_language_scibert_cloth_en_5.1.1_3.0_1694593597348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cdgp_chilean_sign_language_scibert_cloth","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cdgp_chilean_sign_language_scibert_cloth", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cdgp_chilean_sign_language_scibert_cloth| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/AndyChiang/cdgp-csg-scibert-cloth \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_scibert_dgen_en.md b/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_scibert_dgen_en.md new file mode 100644 index 00000000000000..aff679a8f5bb12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cdgp_chilean_sign_language_scibert_dgen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cdgp_chilean_sign_language_scibert_dgen BertEmbeddings from AndyChiang +author: John Snow Labs +name: cdgp_chilean_sign_language_scibert_dgen +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cdgp_chilean_sign_language_scibert_dgen` is a English model originally trained by AndyChiang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cdgp_chilean_sign_language_scibert_dgen_en_5.1.1_3.0_1694593731248.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cdgp_chilean_sign_language_scibert_dgen_en_5.1.1_3.0_1694593731248.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cdgp_chilean_sign_language_scibert_dgen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cdgp_chilean_sign_language_scibert_dgen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cdgp_chilean_sign_language_scibert_dgen| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/AndyChiang/cdgp-csg-scibert-dgen \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chefberto_italian_cased_it.md b/docs/_posts/ahmedlone127/2023-09-13-chefberto_italian_cased_it.md new file mode 100644 index 00000000000000..89b3dbd0bf80a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chefberto_italian_cased_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian chefberto_italian_cased BertEmbeddings from vinhood +author: John Snow Labs +name: chefberto_italian_cased +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chefberto_italian_cased` is a Italian model originally trained by vinhood. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chefberto_italian_cased_it_5.1.1_3.0_1694581754685.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chefberto_italian_cased_it_5.1.1_3.0_1694581754685.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chefberto_italian_cased","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chefberto_italian_cased", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chefberto_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|412.6 MB| + +## References + +https://huggingface.co/vinhood/chefberto-italian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_en.md new file mode 100644 index 00000000000000..8fcb986dd8d999 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English chemical_bert_uncased BertEmbeddings from recobo +author: John Snow Labs +name: chemical_bert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chemical_bert_uncased` is a English model originally trained by recobo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_en_5.1.1_3.0_1694566856839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_en_5.1.1_3.0_1694566856839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chemical_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chemical_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chemical_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/recobo/chemical-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_c1_cust_en.md b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_c1_cust_en.md new file mode 100644 index 00000000000000..11148203990f4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_c1_cust_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English chemical_bert_uncased_finetuned_cust_c1_cust BertEmbeddings from shafin +author: John Snow Labs +name: chemical_bert_uncased_finetuned_cust_c1_cust +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chemical_bert_uncased_finetuned_cust_c1_cust` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_finetuned_cust_c1_cust_en_5.1.1_3.0_1694630083309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_finetuned_cust_c1_cust_en_5.1.1_3.0_1694630083309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chemical_bert_uncased_finetuned_cust_c1_cust","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chemical_bert_uncased_finetuned_cust_c1_cust", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chemical_bert_uncased_finetuned_cust_c1_cust| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/shafin/chemical-bert-uncased-finetuned-cust-c1-cust \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_c2_en.md b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_c2_en.md new file mode 100644 index 00000000000000..290f7b4d47d88f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_c2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English chemical_bert_uncased_finetuned_cust_c2 BertEmbeddings from shafin +author: John Snow Labs +name: chemical_bert_uncased_finetuned_cust_c2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chemical_bert_uncased_finetuned_cust_c2` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_finetuned_cust_c2_en_5.1.1_3.0_1694633093400.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_finetuned_cust_c2_en_5.1.1_3.0_1694633093400.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chemical_bert_uncased_finetuned_cust_c2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chemical_bert_uncased_finetuned_cust_c2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chemical_bert_uncased_finetuned_cust_c2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/shafin/chemical-bert-uncased-finetuned-cust-c2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_en.md b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_en.md new file mode 100644 index 00000000000000..8e12c86e717ffa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chemical_bert_uncased_finetuned_cust_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English chemical_bert_uncased_finetuned_cust BertEmbeddings from shafin +author: John Snow Labs +name: chemical_bert_uncased_finetuned_cust +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chemical_bert_uncased_finetuned_cust` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_finetuned_cust_en_5.1.1_3.0_1694625924079.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chemical_bert_uncased_finetuned_cust_en_5.1.1_3.0_1694625924079.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chemical_bert_uncased_finetuned_cust","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chemical_bert_uncased_finetuned_cust", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chemical_bert_uncased_finetuned_cust| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/shafin/chemical-bert-uncased-finetuned-cust \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-childbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-childbert_en.md new file mode 100644 index 00000000000000..d1c973a23cbb25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-childbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English childbert BertEmbeddings from Aunsiels +author: John Snow Labs +name: childbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`childbert` is a English model originally trained by Aunsiels. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/childbert_en_5.1.1_3.0_1694595021953.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/childbert_en_5.1.1_3.0_1694595021953.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("childbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("childbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|childbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/Aunsiels/ChildBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-childes_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-childes_bert_en.md new file mode 100644 index 00000000000000..931e9114a64ad2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-childes_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English childes_bert BertEmbeddings from smeylan +author: John Snow Labs +name: childes_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`childes_bert` is a English model originally trained by smeylan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/childes_bert_en_5.1.1_3.0_1694574765875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/childes_bert_en_5.1.1_3.0_1694574765875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("childes_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("childes_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|childes_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/smeylan/childes-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-chupeto_en.md b/docs/_posts/ahmedlone127/2023-09-13-chupeto_en.md new file mode 100644 index 00000000000000..c3acf86a8e73d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-chupeto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English chupeto BertEmbeddings from justinian336 +author: John Snow Labs +name: chupeto +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`chupeto` is a English model originally trained by justinian336. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/chupeto_en_5.1.1_3.0_1694577833102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/chupeto_en_5.1.1_3.0_1694577833102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("chupeto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("chupeto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|chupeto| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/justinian336/chupeto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cl_arabertv0.1_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-cl_arabertv0.1_base_en.md new file mode 100644 index 00000000000000..354e265dde3e2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cl_arabertv0.1_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cl_arabertv0.1_base BertEmbeddings from qahq +author: John Snow Labs +name: cl_arabertv0.1_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cl_arabertv0.1_base` is a English model originally trained by qahq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cl_arabertv0.1_base_en_5.1.1_3.0_1694611150025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cl_arabertv0.1_base_en_5.1.1_3.0_1694611150025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cl_arabertv0.1_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cl_arabertv0.1_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cl_arabertv0.1_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.0 MB| + +## References + +https://huggingface.co/qahq/CL-AraBERTv0.1-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clinical_bert_base_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-clinical_bert_base_128_en.md new file mode 100644 index 00000000000000..f18b61939be216 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clinical_bert_base_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinical_bert_base_128 BertEmbeddings from Tsubasaz +author: John Snow Labs +name: clinical_bert_base_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_bert_base_128` is a English model originally trained by Tsubasaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_bert_base_128_en_5.1.1_3.0_1694572855798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_bert_base_128_en_5.1.1_3.0_1694572855798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinical_bert_base_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinical_bert_base_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_bert_base_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/Tsubasaz/clinical-bert-base-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clinical_minialbert_312_en.md b/docs/_posts/ahmedlone127/2023-09-13-clinical_minialbert_312_en.md new file mode 100644 index 00000000000000..33b21c2e528c93 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clinical_minialbert_312_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinical_minialbert_312 BertEmbeddings from nlpie +author: John Snow Labs +name: clinical_minialbert_312 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_minialbert_312` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_minialbert_312_en_5.1.1_3.0_1694574036589.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_minialbert_312_en_5.1.1_3.0_1694574036589.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinical_minialbert_312","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinical_minialbert_312", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_minialbert_312| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.6 MB| + +## References + +https://huggingface.co/nlpie/clinical-miniALBERT-312 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_128_en.md new file mode 100644 index 00000000000000..9a61fb8060b94f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinical_pubmed_bert_base_128 BertEmbeddings from Tsubasaz +author: John Snow Labs +name: clinical_pubmed_bert_base_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_pubmed_bert_base_128` is a English model originally trained by Tsubasaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_pubmed_bert_base_128_en_5.1.1_3.0_1694573024229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_pubmed_bert_base_128_en_5.1.1_3.0_1694573024229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinical_pubmed_bert_base_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinical_pubmed_bert_base_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_pubmed_bert_base_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/Tsubasaz/clinical-pubmed-bert-base-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_512_en.md b/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_512_en.md new file mode 100644 index 00000000000000..0240fd62528afd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clinical_pubmed_bert_base_512_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinical_pubmed_bert_base_512 BertEmbeddings from Tsubasaz +author: John Snow Labs +name: clinical_pubmed_bert_base_512 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_pubmed_bert_base_512` is a English model originally trained by Tsubasaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_pubmed_bert_base_512_en_5.1.1_3.0_1694573181218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_pubmed_bert_base_512_en_5.1.1_3.0_1694573181218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinical_pubmed_bert_base_512","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinical_pubmed_bert_base_512", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_pubmed_bert_base_512| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/Tsubasaz/clinical-pubmed-bert-base-512 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_base_uncased_en.md new file mode 100644 index 00000000000000..3837138d40d8e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clr_finetuned_bert_base_uncased BertEmbeddings from SauravMaheshkar +author: John Snow Labs +name: clr_finetuned_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clr_finetuned_bert_base_uncased` is a English model originally trained by SauravMaheshkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clr_finetuned_bert_base_uncased_en_5.1.1_3.0_1694570611508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clr_finetuned_bert_base_uncased_en_5.1.1_3.0_1694570611508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clr_finetuned_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clr_finetuned_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clr_finetuned_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/SauravMaheshkar/clr-finetuned-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_large_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_large_uncased_en.md new file mode 100644 index 00000000000000..065b6c5b7f9ed8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clr_finetuned_bert_large_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clr_finetuned_bert_large_uncased BertEmbeddings from SauravMaheshkar +author: John Snow Labs +name: clr_finetuned_bert_large_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clr_finetuned_bert_large_uncased` is a English model originally trained by SauravMaheshkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clr_finetuned_bert_large_uncased_en_5.1.1_3.0_1694570948330.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clr_finetuned_bert_large_uncased_en_5.1.1_3.0_1694570948330.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clr_finetuned_bert_large_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clr_finetuned_bert_large_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clr_finetuned_bert_large_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/SauravMaheshkar/clr-finetuned-bert-large-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-clr_pretrained_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-clr_pretrained_bert_base_uncased_en.md new file mode 100644 index 00000000000000..a95a1ffb937be3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-clr_pretrained_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clr_pretrained_bert_base_uncased BertEmbeddings from SauravMaheshkar +author: John Snow Labs +name: clr_pretrained_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clr_pretrained_bert_base_uncased` is a English model originally trained by SauravMaheshkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clr_pretrained_bert_base_uncased_en_5.1.1_3.0_1694571112395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clr_pretrained_bert_base_uncased_en_5.1.1_3.0_1694571112395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clr_pretrained_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clr_pretrained_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clr_pretrained_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/SauravMaheshkar/clr-pretrained-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cms_ext_bio_clinicalbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-cms_ext_bio_clinicalbert_en.md new file mode 100644 index 00000000000000..3f8b71da9b25dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cms_ext_bio_clinicalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cms_ext_bio_clinicalbert BertEmbeddings from lowem1 +author: John Snow Labs +name: cms_ext_bio_clinicalbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cms_ext_bio_clinicalbert` is a English model originally trained by lowem1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cms_ext_bio_clinicalbert_en_5.1.1_3.0_1694584923196.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cms_ext_bio_clinicalbert_en_5.1.1_3.0_1694584923196.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cms_ext_bio_clinicalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cms_ext_bio_clinicalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cms_ext_bio_clinicalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|642.3 MB| + +## References + +https://huggingface.co/lowem1/cms-ext-Bio_ClinicalBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cms_rx_language_correction_en.md b/docs/_posts/ahmedlone127/2023-09-13-cms_rx_language_correction_en.md new file mode 100644 index 00000000000000..8bdd347bb709e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cms_rx_language_correction_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cms_rx_language_correction BertEmbeddings from lowem1 +author: John Snow Labs +name: cms_rx_language_correction +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cms_rx_language_correction` is a English model originally trained by lowem1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cms_rx_language_correction_en_5.1.1_3.0_1694599191432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cms_rx_language_correction_en_5.1.1_3.0_1694599191432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cms_rx_language_correction","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cms_rx_language_correction", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cms_rx_language_correction| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/lowem1/cms-rx-language-correction \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_en.md new file mode 100644 index 00000000000000..49919d736de2a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cocodr_base BertEmbeddings from OpenMatch +author: John Snow Labs +name: cocodr_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cocodr_base` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cocodr_base_en_5.1.1_3.0_1694608342393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cocodr_base_en_5.1.1_3.0_1694608342393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cocodr_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cocodr_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cocodr_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/OpenMatch/cocodr-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_msmarco_en.md new file mode 100644 index 00000000000000..8228637f0d980e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cocodr_base_msmarco BertEmbeddings from OpenMatch +author: John Snow Labs +name: cocodr_base_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cocodr_base_msmarco` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cocodr_base_msmarco_en_5.1.1_3.0_1694609582266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cocodr_base_msmarco_en_5.1.1_3.0_1694609582266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cocodr_base_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cocodr_base_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cocodr_base_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/OpenMatch/cocodr-base-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_msmarco_warmup_en.md b/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_msmarco_warmup_en.md new file mode 100644 index 00000000000000..160749a93944da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cocodr_base_msmarco_warmup_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cocodr_base_msmarco_warmup BertEmbeddings from OpenMatch +author: John Snow Labs +name: cocodr_base_msmarco_warmup +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cocodr_base_msmarco_warmup` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cocodr_base_msmarco_warmup_en_5.1.1_3.0_1694643215149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cocodr_base_msmarco_warmup_en_5.1.1_3.0_1694643215149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cocodr_base_msmarco_warmup","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cocodr_base_msmarco_warmup", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cocodr_base_msmarco_warmup| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/OpenMatch/cocodr-base-msmarco-warmup \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_en.md new file mode 100644 index 00000000000000..1e9f45976a95c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cocodr_large BertEmbeddings from OpenMatch +author: John Snow Labs +name: cocodr_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cocodr_large` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cocodr_large_en_5.1.1_3.0_1694609184010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cocodr_large_en_5.1.1_3.0_1694609184010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cocodr_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cocodr_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cocodr_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/cocodr-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_en.md new file mode 100644 index 00000000000000..c52f7bd32f4645 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cocodr_large_msmarco BertEmbeddings from OpenMatch +author: John Snow Labs +name: cocodr_large_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cocodr_large_msmarco` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cocodr_large_msmarco_en_5.1.1_3.0_1694610587271.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cocodr_large_msmarco_en_5.1.1_3.0_1694610587271.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cocodr_large_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cocodr_large_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cocodr_large_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/cocodr-large-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_idro_only_en.md b/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_idro_only_en.md new file mode 100644 index 00000000000000..02dc5ddfc57ef7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_idro_only_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cocodr_large_msmarco_idro_only BertEmbeddings from OpenMatch +author: John Snow Labs +name: cocodr_large_msmarco_idro_only +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cocodr_large_msmarco_idro_only` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cocodr_large_msmarco_idro_only_en_5.1.1_3.0_1694615878283.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cocodr_large_msmarco_idro_only_en_5.1.1_3.0_1694615878283.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cocodr_large_msmarco_idro_only","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cocodr_large_msmarco_idro_only", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cocodr_large_msmarco_idro_only| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/cocodr-large-msmarco-idro-only \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_warmup_en.md b/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_warmup_en.md new file mode 100644 index 00000000000000..091ba7798f5740 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cocodr_large_msmarco_warmup_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cocodr_large_msmarco_warmup BertEmbeddings from OpenMatch +author: John Snow Labs +name: cocodr_large_msmarco_warmup +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cocodr_large_msmarco_warmup` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cocodr_large_msmarco_warmup_en_5.1.1_3.0_1694615098798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cocodr_large_msmarco_warmup_en_5.1.1_3.0_1694615098798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cocodr_large_msmarco_warmup","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cocodr_large_msmarco_warmup", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cocodr_large_msmarco_warmup| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/cocodr-large-msmarco-warmup \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-colbert_bertnsp_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-colbert_bertnsp_220_en.md new file mode 100644 index 00000000000000..46d97738416db0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-colbert_bertnsp_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English colbert_bertnsp_220 BertEmbeddings from approach0 +author: John Snow Labs +name: colbert_bertnsp_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`colbert_bertnsp_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/colbert_bertnsp_220_en_5.1.1_3.0_1694632363032.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/colbert_bertnsp_220_en_5.1.1_3.0_1694632363032.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("colbert_bertnsp_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("colbert_bertnsp_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|colbert_bertnsp_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/colbert-bertnsp-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-colbert_bertnsp_600_en.md b/docs/_posts/ahmedlone127/2023-09-13-colbert_bertnsp_600_en.md new file mode 100644 index 00000000000000..b5553e73bcb2ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-colbert_bertnsp_600_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English colbert_bertnsp_600 BertEmbeddings from approach0 +author: John Snow Labs +name: colbert_bertnsp_600 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`colbert_bertnsp_600` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/colbert_bertnsp_600_en_5.1.1_3.0_1694632961846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/colbert_bertnsp_600_en_5.1.1_3.0_1694632961846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("colbert_bertnsp_600","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("colbert_bertnsp_600", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|colbert_bertnsp_600| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/colbert-bertnsp-600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-colbert_cocomae_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-colbert_cocomae_220_en.md new file mode 100644 index 00000000000000..b8c5dd29fd76b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-colbert_cocomae_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English colbert_cocomae_220 BertEmbeddings from approach0 +author: John Snow Labs +name: colbert_cocomae_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`colbert_cocomae_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/colbert_cocomae_220_en_5.1.1_3.0_1694631433780.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/colbert_cocomae_220_en_5.1.1_3.0_1694631433780.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("colbert_cocomae_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("colbert_cocomae_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|colbert_cocomae_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/colbert-cocomae-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-colbert_cocomae_600_en.md b/docs/_posts/ahmedlone127/2023-09-13-colbert_cocomae_600_en.md new file mode 100644 index 00000000000000..efca6eb167b23c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-colbert_cocomae_600_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English colbert_cocomae_600 BertEmbeddings from approach0 +author: John Snow Labs +name: colbert_cocomae_600 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`colbert_cocomae_600` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/colbert_cocomae_600_en_5.1.1_3.0_1694631908560.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/colbert_cocomae_600_en_5.1.1_3.0_1694631908560.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("colbert_cocomae_600","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("colbert_cocomae_600", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|colbert_cocomae_600| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/colbert-cocomae-600 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-compact_biobert_en.md b/docs/_posts/ahmedlone127/2023-09-13-compact_biobert_en.md new file mode 100644 index 00000000000000..144968fab29a3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-compact_biobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English compact_biobert BertEmbeddings from nlpie +author: John Snow Labs +name: compact_biobert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`compact_biobert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/compact_biobert_en_5.1.1_3.0_1694574999951.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/compact_biobert_en_5.1.1_3.0_1694574999951.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("compact_biobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("compact_biobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|compact_biobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.4 MB| + +## References + +https://huggingface.co/nlpie/compact-biobert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-condenser_bert_large_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-condenser_bert_large_uncased_en.md new file mode 100644 index 00000000000000..39e5eeba0d1d56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-condenser_bert_large_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English condenser_bert_large_uncased BertEmbeddings from ffgcc +author: John Snow Labs +name: condenser_bert_large_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`condenser_bert_large_uncased` is a English model originally trained by ffgcc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/condenser_bert_large_uncased_en_5.1.1_3.0_1694592731465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/condenser_bert_large_uncased_en_5.1.1_3.0_1694592731465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("condenser_bert_large_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("condenser_bert_large_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|condenser_bert_large_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/ffgcc/condenser-bert-large-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-condenser_en.md b/docs/_posts/ahmedlone127/2023-09-13-condenser_en.md new file mode 100644 index 00000000000000..7f1dac0527eb84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-condenser_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English condenser BertEmbeddings from Luyu +author: John Snow Labs +name: condenser +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`condenser` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/condenser_en_5.1.1_3.0_1694567000935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/condenser_en_5.1.1_3.0_1694567000935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("condenser","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("condenser", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|condenser| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/Luyu/condenser \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-condenser_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-condenser_large_en.md new file mode 100644 index 00000000000000..febd8c7a23d7af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-condenser_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English condenser_large BertEmbeddings from OpenMatch +author: John Snow Labs +name: condenser_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`condenser_large` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/condenser_large_en_5.1.1_3.0_1694612875253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/condenser_large_en_5.1.1_3.0_1694612875253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("condenser_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("condenser_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|condenser_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/condenser-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-continue_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-continue_mlm_en.md new file mode 100644 index 00000000000000..fc267d90feb3fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-continue_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English continue_mlm BertEmbeddings from researchaccount +author: John Snow Labs +name: continue_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`continue_mlm` is a English model originally trained by researchaccount. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/continue_mlm_en_5.1.1_3.0_1694567886435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/continue_mlm_en_5.1.1_3.0_1694567886435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("continue_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("continue_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|continue_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|608.4 MB| + +## References + +https://huggingface.co/researchaccount/continue_mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-contractbr_bert_base_portuguese_en.md b/docs/_posts/ahmedlone127/2023-09-13-contractbr_bert_base_portuguese_en.md new file mode 100644 index 00000000000000..7755c4473c57b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-contractbr_bert_base_portuguese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English contractbr_bert_base_portuguese BertEmbeddings from gacosta +author: John Snow Labs +name: contractbr_bert_base_portuguese +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`contractbr_bert_base_portuguese` is a English model originally trained by gacosta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/contractbr_bert_base_portuguese_en_5.1.1_3.0_1694582459489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/contractbr_bert_base_portuguese_en_5.1.1_3.0_1694582459489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("contractbr_bert_base_portuguese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("contractbr_bert_base_portuguese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|contractbr_bert_base_portuguese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/gacosta/contractbr-bert-base-portuguese \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cord_scibert_en.md b/docs/_posts/ahmedlone127/2023-09-13-cord_scibert_en.md new file mode 100644 index 00000000000000..fb2738000a6231 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cord_scibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cord_scibert BertEmbeddings from athiban2001 +author: John Snow Labs +name: cord_scibert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cord_scibert` is a English model originally trained by athiban2001. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cord_scibert_en_5.1.1_3.0_1694617688873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cord_scibert_en_5.1.1_3.0_1694617688873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cord_scibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cord_scibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cord_scibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.7 MB| + +## References + +https://huggingface.co/athiban2001/cord-scibert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cordbert_1000_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-cordbert_1000_v1_en.md new file mode 100644 index 00000000000000..77c0c28060c6d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cordbert_1000_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cordbert_1000_v1 BertEmbeddings from tanvir21 +author: John Snow Labs +name: cordbert_1000_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cordbert_1000_v1` is a English model originally trained by tanvir21. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cordbert_1000_v1_en_5.1.1_3.0_1694574593447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cordbert_1000_v1_en_5.1.1_3.0_1694574593447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cordbert_1000_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cordbert_1000_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cordbert_1000_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/tanvir21/cordBERT-1000-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_large_en.md new file mode 100644 index 00000000000000..b44932a1fd53f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English corsican_condenser_large BertEmbeddings from OpenMatch +author: John Snow Labs +name: corsican_condenser_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`corsican_condenser_large` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/corsican_condenser_large_en_5.1.1_3.0_1694613630133.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/corsican_condenser_large_en_5.1.1_3.0_1694613630133.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("corsican_condenser_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("corsican_condenser_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|corsican_condenser_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/co-condenser-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_large_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_large_msmarco_en.md new file mode 100644 index 00000000000000..82cb24462acbbe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_large_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English corsican_condenser_large_msmarco BertEmbeddings from OpenMatch +author: John Snow Labs +name: corsican_condenser_large_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`corsican_condenser_large_msmarco` is a English model originally trained by OpenMatch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/corsican_condenser_large_msmarco_en_5.1.1_3.0_1694614396061.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/corsican_condenser_large_msmarco_en_5.1.1_3.0_1694614396061.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("corsican_condenser_large_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("corsican_condenser_large_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|corsican_condenser_large_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/OpenMatch/co-condenser-large-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_marco_en.md b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_marco_en.md new file mode 100644 index 00000000000000..c37cd0b2fc911c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_marco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English corsican_condenser_marco BertEmbeddings from Luyu +author: John Snow Labs +name: corsican_condenser_marco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`corsican_condenser_marco` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/corsican_condenser_marco_en_5.1.1_3.0_1694566669130.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/corsican_condenser_marco_en_5.1.1_3.0_1694566669130.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("corsican_condenser_marco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("corsican_condenser_marco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|corsican_condenser_marco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/Luyu/co-condenser-marco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_wiki_en.md b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_wiki_en.md new file mode 100644 index 00000000000000..1f65c52df534b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-corsican_condenser_wiki_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English corsican_condenser_wiki BertEmbeddings from Luyu +author: John Snow Labs +name: corsican_condenser_wiki +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`corsican_condenser_wiki` is a English model originally trained by Luyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/corsican_condenser_wiki_en_5.1.1_3.0_1694566835743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/corsican_condenser_wiki_en_5.1.1_3.0_1694566835743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("corsican_condenser_wiki","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("corsican_condenser_wiki", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|corsican_condenser_wiki| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/Luyu/co-condenser-wiki \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-costa_wiki_en.md b/docs/_posts/ahmedlone127/2023-09-13-costa_wiki_en.md new file mode 100644 index 00000000000000..a345b27c2093bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-costa_wiki_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English costa_wiki BertEmbeddings from xyma +author: John Snow Labs +name: costa_wiki +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`costa_wiki` is a English model originally trained by xyma. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/costa_wiki_en_5.1.1_3.0_1694624815803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/costa_wiki_en_5.1.1_3.0_1694624815803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("costa_wiki","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("costa_wiki", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|costa_wiki| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/xyma/COSTA-wiki \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cotmae_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-cotmae_base_uncased_en.md new file mode 100644 index 00000000000000..ec4b67fc38f637 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cotmae_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cotmae_base_uncased BertEmbeddings from caskcsg +author: John Snow Labs +name: cotmae_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cotmae_base_uncased` is a English model originally trained by caskcsg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cotmae_base_uncased_en_5.1.1_3.0_1694617137606.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cotmae_base_uncased_en_5.1.1_3.0_1694617137606.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cotmae_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cotmae_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cotmae_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/caskcsg/cotmae_base_uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-covid_bert_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-covid_bert_base_en.md new file mode 100644 index 00000000000000..88d1d6c9e36ad2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-covid_bert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English covid_bert_base BertEmbeddings from deepset +author: John Snow Labs +name: covid_bert_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`covid_bert_base` is a English model originally trained by deepset. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/covid_bert_base_en_5.1.1_3.0_1694601364340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/covid_bert_base_en_5.1.1_3.0_1694601364340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("covid_bert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("covid_bert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|covid_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/deepset/covid_bert_base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ct_pubmedbert_re_en.md b/docs/_posts/ahmedlone127/2023-09-13-ct_pubmedbert_re_en.md new file mode 100644 index 00000000000000..c624a3e3d61279 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ct_pubmedbert_re_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ct_pubmedbert_re BertEmbeddings from zhangzeyu +author: John Snow Labs +name: ct_pubmedbert_re +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ct_pubmedbert_re` is a English model originally trained by zhangzeyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ct_pubmedbert_re_en_5.1.1_3.0_1694640469042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ct_pubmedbert_re_en_5.1.1_3.0_1694640469042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ct_pubmedbert_re","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ct_pubmedbert_re", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ct_pubmedbert_re| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.0 MB| + +## References + +https://huggingface.co/zhangzeyu/CT-PubMedBERT-RE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cubetest_zh.md b/docs/_posts/ahmedlone127/2023-09-13-cubetest_zh.md new file mode 100644 index 00000000000000..bc289bd5bd2a15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cubetest_zh.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Chinese cubetest BertEmbeddings from Cube +author: John Snow Labs +name: cubetest +date: 2023-09-13 +tags: [bert, zh, open_source, fill_mask, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cubetest` is a Chinese model originally trained by Cube. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cubetest_zh_5.1.1_3.0_1694565779939.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cubetest_zh_5.1.1_3.0_1694565779939.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cubetest","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cubetest", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cubetest| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|259.0 MB| + +## References + +https://huggingface.co/Cube/cubetest \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-custom_legalbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-custom_legalbert_en.md new file mode 100644 index 00000000000000..28074d6aa750f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-custom_legalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English custom_legalbert BertEmbeddings from casehold +author: John Snow Labs +name: custom_legalbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`custom_legalbert` is a English model originally trained by casehold. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/custom_legalbert_en_5.1.1_3.0_1694597799860.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/custom_legalbert_en_5.1.1_3.0_1694597799860.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("custom_legalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("custom_legalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|custom_legalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|411.6 MB| + +## References + +https://huggingface.co/casehold/custom-legalbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_chunkedv1_en.md b/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_chunkedv1_en.md new file mode 100644 index 00000000000000..be8ae5fc9970d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_chunkedv1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cxr_bioclinicalbert_chunkedv1 BertEmbeddings from ICLbioengNLP +author: John Snow Labs +name: cxr_bioclinicalbert_chunkedv1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cxr_bioclinicalbert_chunkedv1` is a English model originally trained by ICLbioengNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cxr_bioclinicalbert_chunkedv1_en_5.1.1_3.0_1694616858394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cxr_bioclinicalbert_chunkedv1_en_5.1.1_3.0_1694616858394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cxr_bioclinicalbert_chunkedv1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cxr_bioclinicalbert_chunkedv1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cxr_bioclinicalbert_chunkedv1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/ICLbioengNLP/CXR_BioClinicalBERT_chunkedv1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_mlm_en.md new file mode 100644 index 00000000000000..86a2d2e4dc25c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cxr_bioclinicalbert_mlm BertEmbeddings from ICLbioengNLP +author: John Snow Labs +name: cxr_bioclinicalbert_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cxr_bioclinicalbert_mlm` is a English model originally trained by ICLbioengNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cxr_bioclinicalbert_mlm_en_5.1.1_3.0_1694628615018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cxr_bioclinicalbert_mlm_en_5.1.1_3.0_1694628615018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cxr_bioclinicalbert_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cxr_bioclinicalbert_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cxr_bioclinicalbert_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.0 MB| + +## References + +https://huggingface.co/ICLbioengNLP/CXR_BioClinicalBERT_MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_v1_en.md new file mode 100644 index 00000000000000..dc32f1b8ccf23b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-cxr_bioclinicalbert_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English cxr_bioclinicalbert_v1 BertEmbeddings from dorltcheng +author: John Snow Labs +name: cxr_bioclinicalbert_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`cxr_bioclinicalbert_v1` is a English model originally trained by dorltcheng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/cxr_bioclinicalbert_v1_en_5.1.1_3.0_1694613574482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/cxr_bioclinicalbert_v1_en_5.1.1_3.0_1694613574482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("cxr_bioclinicalbert_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("cxr_bioclinicalbert_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|cxr_bioclinicalbert_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/dorltcheng/CXR_BioClinicalBERT_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-czert_b_base_cased_cs.md b/docs/_posts/ahmedlone127/2023-09-13-czert_b_base_cased_cs.md new file mode 100644 index 00000000000000..75cb46fa7c6a55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-czert_b_base_cased_cs.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Czech czert_b_base_cased BertEmbeddings from UWB-AIR +author: John Snow Labs +name: czert_b_base_cased +date: 2023-09-13 +tags: [bert, cs, open_source, fill_mask, onnx] +task: Embeddings +language: cs +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`czert_b_base_cased` is a Czech model originally trained by UWB-AIR. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/czert_b_base_cased_cs_5.1.1_3.0_1694574332325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/czert_b_base_cased_cs_5.1.1_3.0_1694574332325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("czert_b_base_cased","cs") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("czert_b_base_cased", "cs") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|czert_b_base_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|cs| +|Size:|408.3 MB| + +## References + +https://huggingface.co/UWB-AIR/Czert-B-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dabert_multi_en.md b/docs/_posts/ahmedlone127/2023-09-13-dabert_multi_en.md new file mode 100644 index 00000000000000..599fbb123c6dbe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dabert_multi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dabert_multi BertEmbeddings from christofid +author: John Snow Labs +name: dabert_multi +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dabert_multi` is a English model originally trained by christofid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dabert_multi_en_5.1.1_3.0_1694647641174.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dabert_multi_en_5.1.1_3.0_1694647641174.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dabert_multi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dabert_multi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dabert_multi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/christofid/dabert-multi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dajobbert_base_uncased_da.md b/docs/_posts/ahmedlone127/2023-09-13-dajobbert_base_uncased_da.md new file mode 100644 index 00000000000000..a5c41aaf14f5b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dajobbert_base_uncased_da.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Danish dajobbert_base_uncased BertEmbeddings from jjzha +author: John Snow Labs +name: dajobbert_base_uncased +date: 2023-09-13 +tags: [bert, da, open_source, fill_mask, onnx] +task: Embeddings +language: da +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dajobbert_base_uncased` is a Danish model originally trained by jjzha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dajobbert_base_uncased_da_5.1.1_3.0_1694632323819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dajobbert_base_uncased_da_5.1.1_3.0_1694632323819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dajobbert_base_uncased","da") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dajobbert_base_uncased", "da") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dajobbert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|da| +|Size:|411.3 MB| + +## References + +https://huggingface.co/jjzha/dajobbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dal_bert_finetuned_medical_v3_en.md b/docs/_posts/ahmedlone127/2023-09-13-dal_bert_finetuned_medical_v3_en.md new file mode 100644 index 00000000000000..3ac4fa97d1b892 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dal_bert_finetuned_medical_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dal_bert_finetuned_medical_v3 BertEmbeddings from IRI2070 +author: John Snow Labs +name: dal_bert_finetuned_medical_v3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dal_bert_finetuned_medical_v3` is a English model originally trained by IRI2070. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dal_bert_finetuned_medical_v3_en_5.1.1_3.0_1694601639385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dal_bert_finetuned_medical_v3_en_5.1.1_3.0_1694601639385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dal_bert_finetuned_medical_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dal_bert_finetuned_medical_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dal_bert_finetuned_medical_v3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|432.1 MB| + +## References + +https://huggingface.co/IRI2070/dal-bert-finetuned-medical-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_en.md new file mode 100644 index 00000000000000..adfd1bdd103345 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_en_5.1.1_3.0_1694572069064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_en_5.1.1_3.0_1694572069064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_life_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_life_test_en.md new file mode 100644 index 00000000000000..d17418592ff471 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_life_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_life_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_life_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_life_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_life_test_en_5.1.1_3.0_1694570882292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_life_test_en_5.1.1_3.0_1694570882292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_life_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_life_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_life_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_life_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_rec_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_rec_test_en.md new file mode 100644 index 00000000000000..99c897c087e9d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_rec_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_rec_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_rec_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_rec_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_rec_test_en_5.1.1_3.0_1694571547494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_rec_test_en_5.1.1_3.0_1694571547494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_rec_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_rec_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_rec_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_rec_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_sci_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_sci_test_en.md new file mode 100644 index 00000000000000..3010fc78c3ea32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_sci_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_sci_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_sci_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_sci_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_sci_test_en_5.1.1_3.0_1694571388033.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_sci_test_en_5.1.1_3.0_1694571388033.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_sci_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_sci_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_sci_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_sci_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_tech_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_tech_test_en.md new file mode 100644 index 00000000000000..71ea0695d2c733 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_tech_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_tech_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_tech_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_tech_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_tech_test_en_5.1.1_3.0_1694571198236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_tech_test_en_5.1.1_3.0_1694571198236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_tech_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_tech_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_tech_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_tech_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_write_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_write_test_en.md new file mode 100644 index 00000000000000..1d6ad632de2181 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_lotte_write_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_lotte_write_test BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_lotte_write_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_lotte_write_test` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_write_test_en_5.1.1_3.0_1694571044394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_lotte_write_test_en_5.1.1_3.0_1694571044394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_lotte_write_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_lotte_write_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_lotte_write_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-lotte_write_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_trec_covid_en.md b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_trec_covid_en.md new file mode 100644 index 00000000000000..956dfdf9c5a66f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dam_bert_base_mlm_msmarco_trec_covid_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dam_bert_base_mlm_msmarco_trec_covid BertEmbeddings from jingtao +author: John Snow Labs +name: dam_bert_base_mlm_msmarco_trec_covid +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dam_bert_base_mlm_msmarco_trec_covid` is a English model originally trained by jingtao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_trec_covid_en_5.1.1_3.0_1694571695715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dam_bert_base_mlm_msmarco_trec_covid_en_5.1.1_3.0_1694571695715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dam_bert_base_mlm_msmarco_trec_covid","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dam_bert_base_mlm_msmarco_trec_covid", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dam_bert_base_mlm_msmarco_trec_covid| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jingtao/DAM-bert_base-mlm-msmarco-trec_covid \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-danish_bert_botxo_da.md b/docs/_posts/ahmedlone127/2023-09-13-danish_bert_botxo_da.md new file mode 100644 index 00000000000000..dfaf7571f7bca9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-danish_bert_botxo_da.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Danish danish_bert_botxo BertEmbeddings from Maltehb +author: John Snow Labs +name: danish_bert_botxo +date: 2023-09-13 +tags: [bert, da, open_source, fill_mask, onnx] +task: Embeddings +language: da +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`danish_bert_botxo` is a Danish model originally trained by Maltehb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/danish_bert_botxo_da_5.1.1_3.0_1694567180720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/danish_bert_botxo_da_5.1.1_3.0_1694567180720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("danish_bert_botxo","da") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("danish_bert_botxo", "da") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|danish_bert_botxo| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|da| +|Size:|412.3 MB| + +## References + +https://huggingface.co/Maltehb/danish-bert-botxo \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-danish_legal_bert_base_da.md b/docs/_posts/ahmedlone127/2023-09-13-danish_legal_bert_base_da.md new file mode 100644 index 00000000000000..bddeda23e86fba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-danish_legal_bert_base_da.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Danish danish_legal_bert_base BertEmbeddings from coastalcph +author: John Snow Labs +name: danish_legal_bert_base +date: 2023-09-13 +tags: [bert, da, open_source, fill_mask, onnx] +task: Embeddings +language: da +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`danish_legal_bert_base` is a Danish model originally trained by coastalcph. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/danish_legal_bert_base_da_5.1.1_3.0_1694589626739.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/danish_legal_bert_base_da_5.1.1_3.0_1694589626739.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("danish_legal_bert_base","da") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("danish_legal_bert_base", "da") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|danish_legal_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|da| +|Size:|411.6 MB| + +## References + +https://huggingface.co/coastalcph/danish-legal-bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dapbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-dapbert_en.md new file mode 100644 index 00000000000000..c85685f66e7789 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dapbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dapbert BertEmbeddings from christofid +author: John Snow Labs +name: dapbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dapbert` is a English model originally trained by christofid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dapbert_en_5.1.1_3.0_1694645713628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dapbert_en_5.1.1_3.0_1694645713628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dapbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dapbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dapbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/christofid/dapbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dapscibert_en.md b/docs/_posts/ahmedlone127/2023-09-13-dapscibert_en.md new file mode 100644 index 00000000000000..f9ef2c5902ac33 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dapscibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dapscibert BertEmbeddings from christofid +author: John Snow Labs +name: dapscibert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dapscibert` is a English model originally trained by christofid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dapscibert_en_5.1.1_3.0_1694646154402.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dapscibert_en_5.1.1_3.0_1694646154402.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dapscibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dapscibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dapscibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/christofid/dapscibert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dapt_bert_ko.md b/docs/_posts/ahmedlone127/2023-09-13-dapt_bert_ko.md new file mode 100644 index 00000000000000..9a282a35eb3c46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dapt_bert_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean dapt_bert BertEmbeddings from Kdogs +author: John Snow Labs +name: dapt_bert +date: 2023-09-13 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dapt_bert` is a Korean model originally trained by Kdogs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dapt_bert_ko_5.1.1_3.0_1694576688190.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dapt_bert_ko_5.1.1_3.0_1694576688190.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dapt_bert","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dapt_bert", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dapt_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Kdogs/dapt_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-darijabert_arabizi_ar.md b/docs/_posts/ahmedlone127/2023-09-13-darijabert_arabizi_ar.md new file mode 100644 index 00000000000000..48a8755bfb50a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-darijabert_arabizi_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic darijabert_arabizi BertEmbeddings from SI2M-Lab +author: John Snow Labs +name: darijabert_arabizi +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`darijabert_arabizi` is a Arabic model originally trained by SI2M-Lab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/darijabert_arabizi_ar_5.1.1_3.0_1694564181705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/darijabert_arabizi_ar_5.1.1_3.0_1694564181705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("darijabert_arabizi","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("darijabert_arabizi", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|darijabert_arabizi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|634.9 MB| + +## References + +https://huggingface.co/SI2M-Lab/DarijaBERT-arabizi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-datafinder_scibert_dutch_queries_en.md b/docs/_posts/ahmedlone127/2023-09-13-datafinder_scibert_dutch_queries_en.md new file mode 100644 index 00000000000000..67f6b75cc25669 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-datafinder_scibert_dutch_queries_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English datafinder_scibert_dutch_queries BertEmbeddings from viswavi +author: John Snow Labs +name: datafinder_scibert_dutch_queries +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`datafinder_scibert_dutch_queries` is a English model originally trained by viswavi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/datafinder_scibert_dutch_queries_en_5.1.1_3.0_1694597768652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/datafinder_scibert_dutch_queries_en_5.1.1_3.0_1694597768652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("datafinder_scibert_dutch_queries","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("datafinder_scibert_dutch_queries", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|datafinder_scibert_dutch_queries| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/viswavi/datafinder-scibert-nl-queries \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dbbert_el.md b/docs/_posts/ahmedlone127/2023-09-13-dbbert_el.md new file mode 100644 index 00000000000000..64b4def3a1c89d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dbbert_el.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Modern Greek (1453-) dbbert BertEmbeddings from colinswaelens +author: John Snow Labs +name: dbbert +date: 2023-09-13 +tags: [bert, el, open_source, fill_mask, onnx] +task: Embeddings +language: el +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbbert` is a Modern Greek (1453-) model originally trained by colinswaelens. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbbert_el_5.1.1_3.0_1694581792538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbbert_el_5.1.1_3.0_1694581792538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dbbert","el") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dbbert", "el") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|el| +|Size:|408.3 MB| + +## References + +https://huggingface.co/colinswaelens/DBBErt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dbert_ko.md b/docs/_posts/ahmedlone127/2023-09-13-dbert_ko.md new file mode 100644 index 00000000000000..1fc439d9070a33 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dbert_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean dbert BertEmbeddings from baikal-nlp +author: John Snow Labs +name: dbert +date: 2023-09-13 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbert` is a Korean model originally trained by baikal-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbert_ko_5.1.1_3.0_1694601729093.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbert_ko_5.1.1_3.0_1694601729093.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dbert","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dbert", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|421.2 MB| + +## References + +https://huggingface.co/baikal-nlp/dbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dce_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-dce_bert_en.md new file mode 100644 index 00000000000000..978b1aac7c81b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dce_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dce_bert BertEmbeddings from Daniel-Saeedi +author: John Snow Labs +name: dce_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dce_bert` is a English model originally trained by Daniel-Saeedi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dce_bert_en_5.1.1_3.0_1694588634755.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dce_bert_en_5.1.1_3.0_1694588634755.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dce_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dce_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dce_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Daniel-Saeedi/DCE_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_cls_en.md b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_cls_en.md new file mode 100644 index 00000000000000..ca04ab6fe345a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_cls_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English defsent_bert_base_uncased_cls BertEmbeddings from cl-nagoya +author: John Snow Labs +name: defsent_bert_base_uncased_cls +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`defsent_bert_base_uncased_cls` is a English model originally trained by cl-nagoya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/defsent_bert_base_uncased_cls_en_5.1.1_3.0_1694591623874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/defsent_bert_base_uncased_cls_en_5.1.1_3.0_1694591623874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("defsent_bert_base_uncased_cls","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("defsent_bert_base_uncased_cls", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|defsent_bert_base_uncased_cls| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/cl-nagoya/defsent-bert-base-uncased-cls \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_max_en.md b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_max_en.md new file mode 100644 index 00000000000000..5bd65bbbf6e250 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_max_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English defsent_bert_base_uncased_max BertEmbeddings from cl-nagoya +author: John Snow Labs +name: defsent_bert_base_uncased_max +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`defsent_bert_base_uncased_max` is a English model originally trained by cl-nagoya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/defsent_bert_base_uncased_max_en_5.1.1_3.0_1694591807875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/defsent_bert_base_uncased_max_en_5.1.1_3.0_1694591807875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("defsent_bert_base_uncased_max","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("defsent_bert_base_uncased_max", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|defsent_bert_base_uncased_max| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/cl-nagoya/defsent-bert-base-uncased-max \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_mean_en.md b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_mean_en.md new file mode 100644 index 00000000000000..299e8f7b886645 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_base_uncased_mean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English defsent_bert_base_uncased_mean BertEmbeddings from cl-nagoya +author: John Snow Labs +name: defsent_bert_base_uncased_mean +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`defsent_bert_base_uncased_mean` is a English model originally trained by cl-nagoya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/defsent_bert_base_uncased_mean_en_5.1.1_3.0_1694591979137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/defsent_bert_base_uncased_mean_en_5.1.1_3.0_1694591979137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("defsent_bert_base_uncased_mean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("defsent_bert_base_uncased_mean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|defsent_bert_base_uncased_mean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/cl-nagoya/defsent-bert-base-uncased-mean \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_cls_en.md b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_cls_en.md new file mode 100644 index 00000000000000..775e11f666de9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_cls_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English defsent_bert_large_uncased_cls BertEmbeddings from cl-nagoya +author: John Snow Labs +name: defsent_bert_large_uncased_cls +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`defsent_bert_large_uncased_cls` is a English model originally trained by cl-nagoya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/defsent_bert_large_uncased_cls_en_5.1.1_3.0_1694592276372.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/defsent_bert_large_uncased_cls_en_5.1.1_3.0_1694592276372.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("defsent_bert_large_uncased_cls","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("defsent_bert_large_uncased_cls", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|defsent_bert_large_uncased_cls| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/cl-nagoya/defsent-bert-large-uncased-cls \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_max_en.md b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_max_en.md new file mode 100644 index 00000000000000..e1191204040027 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_max_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English defsent_bert_large_uncased_max BertEmbeddings from cl-nagoya +author: John Snow Labs +name: defsent_bert_large_uncased_max +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`defsent_bert_large_uncased_max` is a English model originally trained by cl-nagoya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/defsent_bert_large_uncased_max_en_5.1.1_3.0_1694592601643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/defsent_bert_large_uncased_max_en_5.1.1_3.0_1694592601643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("defsent_bert_large_uncased_max","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("defsent_bert_large_uncased_max", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|defsent_bert_large_uncased_max| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/cl-nagoya/defsent-bert-large-uncased-max \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_mean_en.md b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_mean_en.md new file mode 100644 index 00000000000000..1fef36d1678c19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-defsent_bert_large_uncased_mean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English defsent_bert_large_uncased_mean BertEmbeddings from cl-nagoya +author: John Snow Labs +name: defsent_bert_large_uncased_mean +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`defsent_bert_large_uncased_mean` is a English model originally trained by cl-nagoya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/defsent_bert_large_uncased_mean_en_5.1.1_3.0_1694592879544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/defsent_bert_large_uncased_mean_en_5.1.1_3.0_1694592879544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("defsent_bert_large_uncased_mean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("defsent_bert_large_uncased_mean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|defsent_bert_large_uncased_mean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/cl-nagoya/defsent-bert-large-uncased-mean \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-detox_kcbert_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-detox_kcbert_base_en.md new file mode 100644 index 00000000000000..49044e9a1c704f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-detox_kcbert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English detox_kcbert_base BertEmbeddings from beomi +author: John Snow Labs +name: detox_kcbert_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`detox_kcbert_base` is a English model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/detox_kcbert_base_en_5.1.1_3.0_1694586418562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/detox_kcbert_base_en_5.1.1_3.0_1694586418562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("detox_kcbert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("detox_kcbert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|detox_kcbert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/beomi/detox-kcbert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dictbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-dictbert_en.md new file mode 100644 index 00000000000000..7fd09a811d9552 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dictbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dictbert BertEmbeddings from wyu1 +author: John Snow Labs +name: dictbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dictbert` is a English model originally trained by wyu1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dictbert_en_5.1.1_3.0_1694574371852.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dictbert_en_5.1.1_3.0_1694574371852.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dictbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dictbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dictbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wyu1/DictBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-distil_biobert_en.md b/docs/_posts/ahmedlone127/2023-09-13-distil_biobert_en.md new file mode 100644 index 00000000000000..e0a3a57abe0059 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-distil_biobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distil_biobert BertEmbeddings from nlpie +author: John Snow Labs +name: distil_biobert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distil_biobert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distil_biobert_en_5.1.1_3.0_1694574845011.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distil_biobert_en_5.1.1_3.0_1694574845011.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("distil_biobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("distil_biobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distil_biobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.4 MB| + +## References + +https://huggingface.co/nlpie/distil-biobert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-distil_clinicalbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-distil_clinicalbert_en.md new file mode 100644 index 00000000000000..97ae8e2fea4c56 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-distil_clinicalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distil_clinicalbert BertEmbeddings from nlpie +author: John Snow Labs +name: distil_clinicalbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distil_clinicalbert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distil_clinicalbert_en_5.1.1_3.0_1694606505752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distil_clinicalbert_en_5.1.1_3.0_1694606505752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("distil_clinicalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("distil_clinicalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distil_clinicalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.5 MB| + +## References + +https://huggingface.co/nlpie/distil-clinicalbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_0.0_en.md b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_0.0_en.md new file mode 100644 index 00000000000000..b07af59d141fe7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_0.0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbertu_base_cased_0.0 BertEmbeddings from amitness +author: John Snow Labs +name: distilbertu_base_cased_0.0 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbertu_base_cased_0.0` is a English model originally trained by amitness. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_0.0_en_5.1.1_3.0_1694584748299.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_0.0_en_5.1.1_3.0_1694584748299.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("distilbertu_base_cased_0.0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("distilbertu_base_cased_0.0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbertu_base_cased_0.0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|470.4 MB| + +## References + +https://huggingface.co/amitness/distilbertu-base-cased-0.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_0.5_en.md b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_0.5_en.md new file mode 100644 index 00000000000000..e775e2a5dc48fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_0.5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbertu_base_cased_0.5 BertEmbeddings from amitness +author: John Snow Labs +name: distilbertu_base_cased_0.5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbertu_base_cased_0.5` is a English model originally trained by amitness. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_0.5_en_5.1.1_3.0_1694584594357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_0.5_en_5.1.1_3.0_1694584594357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("distilbertu_base_cased_0.5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("distilbertu_base_cased_0.5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbertu_base_cased_0.5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|469.9 MB| + +## References + +https://huggingface.co/amitness/distilbertu-base-cased-0.5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_1.0_en.md b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_1.0_en.md new file mode 100644 index 00000000000000..50e164eb9a64dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_1.0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbertu_base_cased_1.0 BertEmbeddings from amitness +author: John Snow Labs +name: distilbertu_base_cased_1.0 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbertu_base_cased_1.0` is a English model originally trained by amitness. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_1.0_en_5.1.1_3.0_1694585114516.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_1.0_en_5.1.1_3.0_1694585114516.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("distilbertu_base_cased_1.0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("distilbertu_base_cased_1.0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbertu_base_cased_1.0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|470.4 MB| + +## References + +https://huggingface.co/amitness/distilbertu-base-cased-1.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_anneal_en.md b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_anneal_en.md new file mode 100644 index 00000000000000..52f56521e1552b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_anneal_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbertu_base_cased_anneal BertEmbeddings from amitness +author: John Snow Labs +name: distilbertu_base_cased_anneal +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbertu_base_cased_anneal` is a English model originally trained by amitness. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_anneal_en_5.1.1_3.0_1694585303981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_anneal_en_5.1.1_3.0_1694585303981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("distilbertu_base_cased_anneal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("distilbertu_base_cased_anneal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbertu_base_cased_anneal| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|470.4 MB| + +## References + +https://huggingface.co/amitness/distilbertu-base-cased-anneal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_en.md new file mode 100644 index 00000000000000..b578da8e30f0b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-distilbertu_base_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbertu_base_cased BertEmbeddings from amitness +author: John Snow Labs +name: distilbertu_base_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbertu_base_cased` is a English model originally trained by amitness. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_en_5.1.1_3.0_1694580349235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbertu_base_cased_en_5.1.1_3.0_1694580349235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("distilbertu_base_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("distilbertu_base_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbertu_base_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/amitness/distilbertu-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dlub_2022_mlm_full_muug_en.md b/docs/_posts/ahmedlone127/2023-09-13-dlub_2022_mlm_full_muug_en.md new file mode 100644 index 00000000000000..6236a746a7f382 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dlub_2022_mlm_full_muug_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dlub_2022_mlm_full_muug BertEmbeddings from Muug +author: John Snow Labs +name: dlub_2022_mlm_full_muug +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dlub_2022_mlm_full_muug` is a English model originally trained by Muug. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_muug_en_5.1.1_3.0_1694586598576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dlub_2022_mlm_full_muug_en_5.1.1_3.0_1694586598576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dlub_2022_mlm_full_muug","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dlub_2022_mlm_full_muug", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dlub_2022_mlm_full_muug| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/Muug/dlub-2022-mlm-full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-domain_adapted_contriever_en.md b/docs/_posts/ahmedlone127/2023-09-13-domain_adapted_contriever_en.md new file mode 100644 index 00000000000000..d1e06eeaea4573 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-domain_adapted_contriever_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English domain_adapted_contriever BertEmbeddings from secilozksen +author: John Snow Labs +name: domain_adapted_contriever +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`domain_adapted_contriever` is a English model originally trained by secilozksen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/domain_adapted_contriever_en_5.1.1_3.0_1694567650087.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/domain_adapted_contriever_en_5.1.1_3.0_1694567650087.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("domain_adapted_contriever","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("domain_adapted_contriever", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|domain_adapted_contriever| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/secilozksen/domain-adapted-contriever \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_020_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_020_en.md new file mode 100644 index 00000000000000..38f4bfbc4f4b8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_bertnsp_020 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_bertnsp_020 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_bertnsp_020` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_020_en_5.1.1_3.0_1694621826042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_020_en_5.1.1_3.0_1694621826042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_bertnsp_020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_bertnsp_020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_bertnsp_020| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-bertnsp-020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_120_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_120_en.md new file mode 100644 index 00000000000000..62a6ed916cfdca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_120_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_bertnsp_120 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_bertnsp_120 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_bertnsp_120` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_120_en_5.1.1_3.0_1694622137182.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_120_en_5.1.1_3.0_1694622137182.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_bertnsp_120","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_bertnsp_120", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_bertnsp_120| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-bertnsp-120 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_220_en.md new file mode 100644 index 00000000000000..cf965b9cc9cf88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_bertnsp_220 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_bertnsp_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_bertnsp_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_220_en_5.1.1_3.0_1694622510938.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_220_en_5.1.1_3.0_1694622510938.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_bertnsp_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_bertnsp_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_bertnsp_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-bertnsp-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_320_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_320_en.md new file mode 100644 index 00000000000000..d4913fff9334d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_320_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_bertnsp_320 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_bertnsp_320 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_bertnsp_320` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_320_en_5.1.1_3.0_1694622790193.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_320_en_5.1.1_3.0_1694622790193.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_bertnsp_320","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_bertnsp_320", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_bertnsp_320| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-bertnsp-320 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_520_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_520_en.md new file mode 100644 index 00000000000000..9810c233518a59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_bertnsp_520_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_bertnsp_520 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_bertnsp_520 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_bertnsp_520` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_520_en_5.1.1_3.0_1694623237282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_bertnsp_520_en_5.1.1_3.0_1694623237282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_bertnsp_520","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_bertnsp_520", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_bertnsp_520| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-bertnsp-520 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_passage_encoder_viquiquad_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_passage_encoder_viquiquad_base_en.md new file mode 100644 index 00000000000000..b53404b799c571 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_passage_encoder_viquiquad_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_catalan_passage_encoder_viquiquad_base BertEmbeddings from Koslav +author: John Snow Labs +name: dpr_catalan_passage_encoder_viquiquad_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_catalan_passage_encoder_viquiquad_base` is a English model originally trained by Koslav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_catalan_passage_encoder_viquiquad_base_en_5.1.1_3.0_1694580713399.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_catalan_passage_encoder_viquiquad_base_en_5.1.1_3.0_1694580713399.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_catalan_passage_encoder_viquiquad_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_catalan_passage_encoder_viquiquad_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_catalan_passage_encoder_viquiquad_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Koslav/dpr-catalan-passage_encoder-viquiquad-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_question_encoder_viquiquad_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_question_encoder_viquiquad_base_en.md new file mode 100644 index 00000000000000..546745425754ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_catalan_question_encoder_viquiquad_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_catalan_question_encoder_viquiquad_base BertEmbeddings from Koslav +author: John Snow Labs +name: dpr_catalan_question_encoder_viquiquad_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_catalan_question_encoder_viquiquad_base` is a English model originally trained by Koslav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_catalan_question_encoder_viquiquad_base_en_5.1.1_3.0_1694579879042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_catalan_question_encoder_viquiquad_base_en_5.1.1_3.0_1694579879042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_catalan_question_encoder_viquiquad_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_catalan_question_encoder_viquiquad_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_catalan_question_encoder_viquiquad_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Koslav/dpr-catalan-question_encoder-viquiquad-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_020_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_020_en.md new file mode 100644 index 00000000000000..ca13cd1b8e3cce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocomae_020 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocomae_020 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocomae_020` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocomae_020_en_5.1.1_3.0_1694619413332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocomae_020_en_5.1.1_3.0_1694619413332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocomae_020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocomae_020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocomae_020| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocomae-020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_120_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_120_en.md new file mode 100644 index 00000000000000..29e7b1aa1d0bbc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_120_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocomae_120 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocomae_120 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocomae_120` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocomae_120_en_5.1.1_3.0_1694619848167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocomae_120_en_5.1.1_3.0_1694619848167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocomae_120","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocomae_120", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocomae_120| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocomae-120 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_220_en.md new file mode 100644 index 00000000000000..2ab4db6d7de341 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocomae_220 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocomae_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocomae_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocomae_220_en_5.1.1_3.0_1694620314728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocomae_220_en_5.1.1_3.0_1694620314728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocomae_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocomae_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocomae_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocomae-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_320_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_320_en.md new file mode 100644 index 00000000000000..3f35572f9173df --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_320_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocomae_320 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocomae_320 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocomae_320` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocomae_320_en_5.1.1_3.0_1694620794927.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocomae_320_en_5.1.1_3.0_1694620794927.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocomae_320","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocomae_320", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocomae_320| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocomae-320 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_520_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_520_en.md new file mode 100644 index 00000000000000..c0f019de3ba24e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocomae_520_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocomae_520 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocomae_520 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocomae_520` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocomae_520_en_5.1.1_3.0_1694621336068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocomae_520_en_5.1.1_3.0_1694621336068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocomae_520","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocomae_520", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocomae_520| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cocomae-520 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_020_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_020_en.md new file mode 100644 index 00000000000000..3de166049c912d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocondenser_020 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocondenser_020 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocondenser_020` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_020_en_5.1.1_3.0_1694623728257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_020_en_5.1.1_3.0_1694623728257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocondenser_020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocondenser_020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocondenser_020| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocondenser-020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_120_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_120_en.md new file mode 100644 index 00000000000000..b8817b95d90bcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_120_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocondenser_120 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocondenser_120 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocondenser_120` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_120_en_5.1.1_3.0_1694623975734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_120_en_5.1.1_3.0_1694623975734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocondenser_120","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocondenser_120", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocondenser_120| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocondenser-120 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_220_en.md new file mode 100644 index 00000000000000..8b913747e55857 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocondenser_220 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocondenser_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocondenser_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_220_en_5.1.1_3.0_1694624315821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_220_en_5.1.1_3.0_1694624315821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocondenser_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocondenser_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocondenser_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocondenser-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_320_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_320_en.md new file mode 100644 index 00000000000000..6959bcc77bb353 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_320_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocondenser_320 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocondenser_320 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocondenser_320` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_320_en_5.1.1_3.0_1694624581698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_320_en_5.1.1_3.0_1694624581698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocondenser_320","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocondenser_320", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocondenser_320| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocondenser-320 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_520_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_520_en.md new file mode 100644 index 00000000000000..7fb21f76226d8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cocondenser_520_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cocondenser_520 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cocondenser_520 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cocondenser_520` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_520_en_5.1.1_3.0_1694624864486.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cocondenser_520_en_5.1.1_3.0_1694624864486.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cocondenser_520","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cocondenser_520", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cocondenser_520| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/dpr-cocondenser-520 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_020_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_020_en.md new file mode 100644 index 00000000000000..84f3f8fa3309d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotbert_020 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotbert_020 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotbert_020` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotbert_020_en_5.1.1_3.0_1694627301790.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotbert_020_en_5.1.1_3.0_1694627301790.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotbert_020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotbert_020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotbert_020| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotbert-020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_120_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_120_en.md new file mode 100644 index 00000000000000..75a0082cd419cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_120_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotbert_120 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotbert_120 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotbert_120` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotbert_120_en_5.1.1_3.0_1694627761408.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotbert_120_en_5.1.1_3.0_1694627761408.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotbert_120","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotbert_120", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotbert_120| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotbert-120 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_220_en.md new file mode 100644 index 00000000000000..483fa4dc5e631e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotbert_220 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotbert_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotbert_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotbert_220_en_5.1.1_3.0_1694628278286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotbert_220_en_5.1.1_3.0_1694628278286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotbert_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotbert_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotbert_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotbert-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_320_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_320_en.md new file mode 100644 index 00000000000000..1ef444321ec991 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_320_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotbert_320 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotbert_320 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotbert_320` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotbert_320_en_5.1.1_3.0_1694628675772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotbert_320_en_5.1.1_3.0_1694628675772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotbert_320","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotbert_320", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotbert_320| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotbert-320 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_520_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_520_en.md new file mode 100644 index 00000000000000..17c9e4e3514ffc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotbert_520_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotbert_520 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotbert_520 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotbert_520` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotbert_520_en_5.1.1_3.0_1694629038955.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotbert_520_en_5.1.1_3.0_1694629038955.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotbert_520","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotbert_520", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotbert_520| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotbert-520 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_020_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_020_en.md new file mode 100644 index 00000000000000..85b51b65aacf58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotmae_020 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotmae_020 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotmae_020` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotmae_020_en_5.1.1_3.0_1694625405708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotmae_020_en_5.1.1_3.0_1694625405708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotmae_020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotmae_020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotmae_020| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotmae-020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_120_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_120_en.md new file mode 100644 index 00000000000000..d460cfe9823ac2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_120_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotmae_120 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotmae_120 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotmae_120` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotmae_120_en_5.1.1_3.0_1694625762302.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotmae_120_en_5.1.1_3.0_1694625762302.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotmae_120","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotmae_120", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotmae_120| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotmae-120 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_220_en.md new file mode 100644 index 00000000000000..7ff1f9294f7e0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotmae_220 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotmae_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotmae_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotmae_220_en_5.1.1_3.0_1694626146098.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotmae_220_en_5.1.1_3.0_1694626146098.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotmae_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotmae_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotmae_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotmae-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_320_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_320_en.md new file mode 100644 index 00000000000000..60e1e655809b9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_320_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotmae_320 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotmae_320 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotmae_320` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotmae_320_en_5.1.1_3.0_1694626464675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotmae_320_en_5.1.1_3.0_1694626464675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotmae_320","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotmae_320", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotmae_320| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotmae-320 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_520_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_520_en.md new file mode 100644 index 00000000000000..c6a19128a03475 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_cotmae_520_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_cotmae_520 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_cotmae_520 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_cotmae_520` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_cotmae_520_en_5.1.1_3.0_1694626881352.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_cotmae_520_en_5.1.1_3.0_1694626881352.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_cotmae_520","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_cotmae_520", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_cotmae_520| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/dpr-cotmae-520 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_passage_encoder_allqa_base_es.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_passage_encoder_allqa_base_es.md new file mode 100644 index 00000000000000..85c88b6518bbe8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_passage_encoder_allqa_base_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish dpr_spanish_passage_encoder_allqa_base BertEmbeddings from IIC +author: John Snow Labs +name: dpr_spanish_passage_encoder_allqa_base +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_spanish_passage_encoder_allqa_base` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_spanish_passage_encoder_allqa_base_es_5.1.1_3.0_1694619434308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_spanish_passage_encoder_allqa_base_es_5.1.1_3.0_1694619434308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_spanish_passage_encoder_allqa_base","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_spanish_passage_encoder_allqa_base", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_spanish_passage_encoder_allqa_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/IIC/dpr-spanish-passage_encoder-allqa-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_passage_encoder_squades_base_es.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_passage_encoder_squades_base_es.md new file mode 100644 index 00000000000000..1e089f5458263f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_passage_encoder_squades_base_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish dpr_spanish_passage_encoder_squades_base BertEmbeddings from IIC +author: John Snow Labs +name: dpr_spanish_passage_encoder_squades_base +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_spanish_passage_encoder_squades_base` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_spanish_passage_encoder_squades_base_es_5.1.1_3.0_1694612516347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_spanish_passage_encoder_squades_base_es_5.1.1_3.0_1694612516347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_spanish_passage_encoder_squades_base","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_spanish_passage_encoder_squades_base", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_spanish_passage_encoder_squades_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/IIC/dpr-spanish-passage_encoder-squades-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_question_encoder_allqa_base_es.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_question_encoder_allqa_base_es.md new file mode 100644 index 00000000000000..3bbe62430dcd6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_question_encoder_allqa_base_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish dpr_spanish_question_encoder_allqa_base BertEmbeddings from IIC +author: John Snow Labs +name: dpr_spanish_question_encoder_allqa_base +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_spanish_question_encoder_allqa_base` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_spanish_question_encoder_allqa_base_es_5.1.1_3.0_1694619850497.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_spanish_question_encoder_allqa_base_es_5.1.1_3.0_1694619850497.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_spanish_question_encoder_allqa_base","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_spanish_question_encoder_allqa_base", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_spanish_question_encoder_allqa_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/IIC/dpr-spanish-question_encoder-allqa-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_question_encoder_squades_base_es.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_question_encoder_squades_base_es.md new file mode 100644 index 00000000000000..51fa558ec481ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_spanish_question_encoder_squades_base_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish dpr_spanish_question_encoder_squades_base BertEmbeddings from IIC +author: John Snow Labs +name: dpr_spanish_question_encoder_squades_base +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_spanish_question_encoder_squades_base` is a Castilian, Spanish model originally trained by IIC. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_spanish_question_encoder_squades_base_es_5.1.1_3.0_1694613007694.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_spanish_question_encoder_squades_base_es_5.1.1_3.0_1694613007694.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_spanish_question_encoder_squades_base","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_spanish_question_encoder_squades_base", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_spanish_question_encoder_squades_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.5 MB| + +## References + +https://huggingface.co/IIC/dpr-spanish-question_encoder-squades-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_020_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_020_en.md new file mode 100644 index 00000000000000..1010fac53bccba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_vanilla_bert_020 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_vanilla_bert_020 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_vanilla_bert_020` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_020_en_5.1.1_3.0_1694629366872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_020_en_5.1.1_3.0_1694629366872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_vanilla_bert_020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_vanilla_bert_020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_vanilla_bert_020| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/approach0/dpr-vanilla-bert-020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_120_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_120_en.md new file mode 100644 index 00000000000000..1dff432c9b6fd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_120_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_vanilla_bert_120 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_vanilla_bert_120 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_vanilla_bert_120` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_120_en_5.1.1_3.0_1694629722071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_120_en_5.1.1_3.0_1694629722071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_vanilla_bert_120","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_vanilla_bert_120", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_vanilla_bert_120| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/approach0/dpr-vanilla-bert-120 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_220_en.md new file mode 100644 index 00000000000000..d202ba7dad79ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_vanilla_bert_220 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_vanilla_bert_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_vanilla_bert_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_220_en_5.1.1_3.0_1694630120901.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_220_en_5.1.1_3.0_1694630120901.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_vanilla_bert_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_vanilla_bert_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_vanilla_bert_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/approach0/dpr-vanilla-bert-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_320_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_320_en.md new file mode 100644 index 00000000000000..d83e37f7a2dc99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_320_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_vanilla_bert_320 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_vanilla_bert_320 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_vanilla_bert_320` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_320_en_5.1.1_3.0_1694630513466.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_320_en_5.1.1_3.0_1694630513466.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_vanilla_bert_320","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_vanilla_bert_320", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_vanilla_bert_320| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/approach0/dpr-vanilla-bert-320 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_520_en.md b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_520_en.md new file mode 100644 index 00000000000000..9f1cf37b6f1039 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dpr_vanilla_bert_520_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dpr_vanilla_bert_520 BertEmbeddings from approach0 +author: John Snow Labs +name: dpr_vanilla_bert_520 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dpr_vanilla_bert_520` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_520_en_5.1.1_3.0_1694630979516.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dpr_vanilla_bert_520_en_5.1.1_3.0_1694630979516.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dpr_vanilla_bert_520","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dpr_vanilla_bert_520", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dpr_vanilla_bert_520| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/approach0/dpr-vanilla-bert-520 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_context_encoder_en.md b/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_context_encoder_en.md new file mode 100644 index 00000000000000..1b1ccc964427bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_context_encoder_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dragon_plus_context_encoder BertEmbeddings from facebook +author: John Snow Labs +name: dragon_plus_context_encoder +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dragon_plus_context_encoder` is a English model originally trained by facebook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dragon_plus_context_encoder_en_5.1.1_3.0_1694576995913.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dragon_plus_context_encoder_en_5.1.1_3.0_1694576995913.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dragon_plus_context_encoder","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dragon_plus_context_encoder", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dragon_plus_context_encoder| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.5 MB| + +## References + +https://huggingface.co/facebook/dragon-plus-context-encoder \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_query_encoder_en.md b/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_query_encoder_en.md new file mode 100644 index 00000000000000..fb4aea1535cc65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dragon_plus_query_encoder_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dragon_plus_query_encoder BertEmbeddings from facebook +author: John Snow Labs +name: dragon_plus_query_encoder +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dragon_plus_query_encoder` is a English model originally trained by facebook. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dragon_plus_query_encoder_en_5.1.1_3.0_1694576871066.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dragon_plus_query_encoder_en_5.1.1_3.0_1694576871066.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dragon_plus_query_encoder","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dragon_plus_query_encoder", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dragon_plus_query_encoder| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/facebook/dragon-plus-query-encoder \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-drug_combinations_lm_pubmedbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-drug_combinations_lm_pubmedbert_en.md new file mode 100644 index 00000000000000..726036d9ebe635 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-drug_combinations_lm_pubmedbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English drug_combinations_lm_pubmedbert BertEmbeddings from allenai +author: John Snow Labs +name: drug_combinations_lm_pubmedbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`drug_combinations_lm_pubmedbert` is a English model originally trained by allenai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/drug_combinations_lm_pubmedbert_en_5.1.1_3.0_1694595258369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/drug_combinations_lm_pubmedbert_en_5.1.1_3.0_1694595258369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("drug_combinations_lm_pubmedbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("drug_combinations_lm_pubmedbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|drug_combinations_lm_pubmedbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/allenai/drug_combinations_lm_pubmedbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-duck_and_cover_genre_encoder_en.md b/docs/_posts/ahmedlone127/2023-09-13-duck_and_cover_genre_encoder_en.md new file mode 100644 index 00000000000000..7047da12b2b3a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-duck_and_cover_genre_encoder_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English duck_and_cover_genre_encoder BertEmbeddings from mnne +author: John Snow Labs +name: duck_and_cover_genre_encoder +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`duck_and_cover_genre_encoder` is a English model originally trained by mnne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/duck_and_cover_genre_encoder_en_5.1.1_3.0_1694625838242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/duck_and_cover_genre_encoder_en_5.1.1_3.0_1694625838242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("duck_and_cover_genre_encoder","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("duck_and_cover_genre_encoder", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|duck_and_cover_genre_encoder| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|41.8 MB| + +## References + +https://huggingface.co/mnne/duck-and-cover-genre-encoder \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dummy_model_aripo99_en.md b/docs/_posts/ahmedlone127/2023-09-13-dummy_model_aripo99_en.md new file mode 100644 index 00000000000000..520d13d7a60b50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dummy_model_aripo99_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_aripo99 BertEmbeddings from aripo99 +author: John Snow Labs +name: dummy_model_aripo99 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_aripo99` is a English model originally trained by aripo99. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_aripo99_en_5.1.1_3.0_1694581314642.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_aripo99_en_5.1.1_3.0_1694581314642.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_aripo99","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_aripo99", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_aripo99| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/aripo99/dummy_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dummy_model_jammm1412_en.md b/docs/_posts/ahmedlone127/2023-09-13-dummy_model_jammm1412_en.md new file mode 100644 index 00000000000000..05043ba2285adc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dummy_model_jammm1412_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_jammm1412 BertEmbeddings from jammm1412 +author: John Snow Labs +name: dummy_model_jammm1412 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_jammm1412` is a English model originally trained by jammm1412. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_jammm1412_en_5.1.1_3.0_1694590400887.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_jammm1412_en_5.1.1_3.0_1694590400887.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_jammm1412","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_jammm1412", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_jammm1412| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/jammm1412/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dummy_model_yangwooko_en.md b/docs/_posts/ahmedlone127/2023-09-13-dummy_model_yangwooko_en.md new file mode 100644 index 00000000000000..7ba86749517693 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dummy_model_yangwooko_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_yangwooko BertEmbeddings from yangwooko +author: John Snow Labs +name: dummy_model_yangwooko +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_yangwooko` is a English model originally trained by yangwooko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_yangwooko_en_5.1.1_3.0_1694592058468.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_yangwooko_en_5.1.1_3.0_1694592058468.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_yangwooko","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_yangwooko", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_yangwooko| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/yangwooko/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_1_en.md new file mode 100644 index 00000000000000..6c27cd0264995b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_prac_model_1 BertEmbeddings from fayez94 +author: John Snow Labs +name: dummy_prac_model_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_prac_model_1` is a English model originally trained by fayez94. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_prac_model_1_en_5.1.1_3.0_1694564301534.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_prac_model_1_en_5.1.1_3.0_1694564301534.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_prac_model_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_prac_model_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_prac_model_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/fayez94/dummy_prac_model_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_en.md new file mode 100644 index 00000000000000..85213af1c02129 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dummy_prac_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_prac_model BertEmbeddings from fayez94 +author: John Snow Labs +name: dummy_prac_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_prac_model` is a English model originally trained by fayez94. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_prac_model_en_5.1.1_3.0_1694564149966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_prac_model_en_5.1.1_3.0_1694564149966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_prac_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_prac_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_prac_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/fayez94/dummy_prac_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-dziribert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-dziribert_ar.md new file mode 100644 index 00000000000000..8b9d37ffceb42d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-dziribert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic dziribert BertEmbeddings from alger-ia +author: John Snow Labs +name: dziribert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dziribert` is a Arabic model originally trained by alger-ia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dziribert_ar_5.1.1_3.0_1694578688485.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dziribert_ar_5.1.1_3.0_1694578688485.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dziribert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dziribert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dziribert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|462.5 MB| + +## References + +https://huggingface.co/alger-ia/dziribert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-e4a_covid_bert_base_romanian_cased_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-e4a_covid_bert_base_romanian_cased_v1_en.md new file mode 100644 index 00000000000000..e993f7f2b6522b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-e4a_covid_bert_base_romanian_cased_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English e4a_covid_bert_base_romanian_cased_v1 BertEmbeddings from racai +author: John Snow Labs +name: e4a_covid_bert_base_romanian_cased_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e4a_covid_bert_base_romanian_cased_v1` is a English model originally trained by racai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e4a_covid_bert_base_romanian_cased_v1_en_5.1.1_3.0_1694580152465.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e4a_covid_bert_base_romanian_cased_v1_en_5.1.1_3.0_1694580152465.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("e4a_covid_bert_base_romanian_cased_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("e4a_covid_bert_base_romanian_cased_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e4a_covid_bert_base_romanian_cased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.0 MB| + +## References + +https://huggingface.co/racai/e4a-covid-bert-base-romanian-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-e4a_permits_bert_base_romanian_cased_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-e4a_permits_bert_base_romanian_cased_v1_en.md new file mode 100644 index 00000000000000..3560d137a409ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-e4a_permits_bert_base_romanian_cased_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English e4a_permits_bert_base_romanian_cased_v1 BertEmbeddings from racai +author: John Snow Labs +name: e4a_permits_bert_base_romanian_cased_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e4a_permits_bert_base_romanian_cased_v1` is a English model originally trained by racai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e4a_permits_bert_base_romanian_cased_v1_en_5.1.1_3.0_1694580307122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e4a_permits_bert_base_romanian_cased_v1_en_5.1.1_3.0_1694580307122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("e4a_permits_bert_base_romanian_cased_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("e4a_permits_bert_base_romanian_cased_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e4a_permits_bert_base_romanian_cased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.0 MB| + +## References + +https://huggingface.co/racai/e4a-permits-bert-base-romanian-cased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ecomm_sbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-ecomm_sbert_en.md new file mode 100644 index 00000000000000..b8cb1125cb7fad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ecomm_sbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ecomm_sbert BertEmbeddings from Maunish +author: John Snow Labs +name: ecomm_sbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ecomm_sbert` is a English model originally trained by Maunish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ecomm_sbert_en_5.1.1_3.0_1694567607524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ecomm_sbert_en_5.1.1_3.0_1694567607524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ecomm_sbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ecomm_sbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ecomm_sbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|84.6 MB| + +## References + +https://huggingface.co/Maunish/ecomm-sbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-econobert_en.md b/docs/_posts/ahmedlone127/2023-09-13-econobert_en.md new file mode 100644 index 00000000000000..200aebd06695e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-econobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English econobert BertEmbeddings from samchain +author: John Snow Labs +name: econobert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`econobert` is a English model originally trained by samchain. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/econobert_en_5.1.1_3.0_1694622554131.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/econobert_en_5.1.1_3.0_1694622554131.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("econobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("econobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|econobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/samchain/EconoBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-energybert_en.md b/docs/_posts/ahmedlone127/2023-09-13-energybert_en.md new file mode 100644 index 00000000000000..2c3804620bf30c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-energybert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English energybert BertEmbeddings from UNSW-MasterAI +author: John Snow Labs +name: energybert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`energybert` is a English model originally trained by UNSW-MasterAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/energybert_en_5.1.1_3.0_1694577639489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/energybert_en_5.1.1_3.0_1694577639489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("energybert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("energybert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|energybert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/UNSW-MasterAI/EnergyBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-eng_en.md b/docs/_posts/ahmedlone127/2023-09-13-eng_en.md new file mode 100644 index 00000000000000..49ae10b89a5e1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-eng_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English eng BertEmbeddings from willemjan +author: John Snow Labs +name: eng +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`eng` is a English model originally trained by willemjan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/eng_en_5.1.1_3.0_1694584220308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/eng_en_5.1.1_3.0_1694584220308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("eng","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("eng", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|eng| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/willemjan/eng \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ern3_en.md b/docs/_posts/ahmedlone127/2023-09-13-ern3_en.md new file mode 100644 index 00000000000000..d3a2d264656ffe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ern3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ern3 BertEmbeddings from linyi +author: John Snow Labs +name: ern3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ern3` is a English model originally trained by linyi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ern3_en_5.1.1_3.0_1694582155435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ern3_en_5.1.1_3.0_1694582155435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ern3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ern3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ern3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|440.8 MB| + +## References + +https://huggingface.co/linyi/ern3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_alllang_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_alllang_bert_base_uncased_en.md new file mode 100644 index 00000000000000..dc0421d1284e58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_alllang_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English esci_mlm_alllang_bert_base_uncased BertEmbeddings from spacemanidol +author: John Snow Labs +name: esci_mlm_alllang_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esci_mlm_alllang_bert_base_uncased` is a English model originally trained by spacemanidol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esci_mlm_alllang_bert_base_uncased_en_5.1.1_3.0_1694576764282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esci_mlm_alllang_bert_base_uncased_en_5.1.1_3.0_1694576764282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("esci_mlm_alllang_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("esci_mlm_alllang_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esci_mlm_alllang_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/spacemanidol/esci-mlm-alllang-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_us_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_us_bert_base_uncased_en.md new file mode 100644 index 00000000000000..fb350cc0a49cc4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-esci_mlm_us_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English esci_mlm_us_bert_base_uncased BertEmbeddings from spacemanidol +author: John Snow Labs +name: esci_mlm_us_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`esci_mlm_us_bert_base_uncased` is a English model originally trained by spacemanidol. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/esci_mlm_us_bert_base_uncased_en_5.1.1_3.0_1694576915321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/esci_mlm_us_bert_base_uncased_en_5.1.1_3.0_1694576915321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("esci_mlm_us_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("esci_mlm_us_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|esci_mlm_us_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.5 MB| + +## References + +https://huggingface.co/spacemanidol/esci-mlm-us-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-estbert_512_et.md b/docs/_posts/ahmedlone127/2023-09-13-estbert_512_et.md new file mode 100644 index 00000000000000..58c6c881cb2ace --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-estbert_512_et.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Estonian estbert_512 BertEmbeddings from tartuNLP +author: John Snow Labs +name: estbert_512 +date: 2023-09-13 +tags: [bert, et, open_source, fill_mask, onnx] +task: Embeddings +language: et +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`estbert_512` is a Estonian model originally trained by tartuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/estbert_512_et_5.1.1_3.0_1694576625464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/estbert_512_et_5.1.1_3.0_1694576625464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("estbert_512","et") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("estbert_512", "et") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|estbert_512| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|et| +|Size:|463.4 MB| + +## References + +https://huggingface.co/tartuNLP/EstBERT_512 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-estbert_et.md b/docs/_posts/ahmedlone127/2023-09-13-estbert_et.md new file mode 100644 index 00000000000000..170d8aafd0938f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-estbert_et.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Estonian estbert BertEmbeddings from tartuNLP +author: John Snow Labs +name: estbert +date: 2023-09-13 +tags: [bert, et, open_source, fill_mask, onnx] +task: Embeddings +language: et +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`estbert` is a Estonian model originally trained by tartuNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/estbert_et_5.1.1_3.0_1694576444603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/estbert_et_5.1.1_3.0_1694576444603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("estbert","et") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("estbert", "et") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|estbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|et| +|Size:|463.4 MB| + +## References + +https://huggingface.co/tartuNLP/EstBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-evalconvqa_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-evalconvqa_bert_en.md new file mode 100644 index 00000000000000..57fc7a4aea9d94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-evalconvqa_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English evalconvqa_bert BertEmbeddings from princeton-nlp +author: John Snow Labs +name: evalconvqa_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`evalconvqa_bert` is a English model originally trained by princeton-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/evalconvqa_bert_en_5.1.1_3.0_1694565341212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/evalconvqa_bert_en_5.1.1_3.0_1694565341212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("evalconvqa_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("evalconvqa_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|evalconvqa_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/princeton-nlp/EvalConvQA_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-fast_food_pure_relation_v0_en.md b/docs/_posts/ahmedlone127/2023-09-13-fast_food_pure_relation_v0_en.md new file mode 100644 index 00000000000000..82b1df2016aac1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-fast_food_pure_relation_v0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fast_food_pure_relation_v0 BertEmbeddings from yogeshchandrasekharuni +author: John Snow Labs +name: fast_food_pure_relation_v0 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fast_food_pure_relation_v0` is a English model originally trained by yogeshchandrasekharuni. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fast_food_pure_relation_v0_en_5.1.1_3.0_1694618849500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fast_food_pure_relation_v0_en_5.1.1_3.0_1694618849500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fast_food_pure_relation_v0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fast_food_pure_relation_v0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fast_food_pure_relation_v0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/yogeshchandrasekharuni/fast-food-pure-relation-v0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-fbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-fbert_en.md new file mode 100644 index 00000000000000..f8b56cc3353814 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-fbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fbert BertEmbeddings from diptanu +author: John Snow Labs +name: fbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fbert` is a English model originally trained by diptanu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fbert_en_5.1.1_3.0_1694624971451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fbert_en_5.1.1_3.0_1694624971451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/diptanu/fBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-fernet_c5_cs.md b/docs/_posts/ahmedlone127/2023-09-13-fernet_c5_cs.md new file mode 100644 index 00000000000000..f7e3d06fb0d1de --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-fernet_c5_cs.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Czech fernet_c5 BertEmbeddings from fav-kky +author: John Snow Labs +name: fernet_c5 +date: 2023-09-13 +tags: [bert, cs, open_source, fill_mask, onnx] +task: Embeddings +language: cs +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fernet_c5` is a Czech model originally trained by fav-kky. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fernet_c5_cs_5.1.1_3.0_1694636619880.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fernet_c5_cs_5.1.1_3.0_1694636619880.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fernet_c5","cs") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fernet_c5", "cs") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fernet_c5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|cs| +|Size:|609.5 MB| + +## References + +https://huggingface.co/fav-kky/FERNET-C5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-fernet_cc_slovak_sk.md b/docs/_posts/ahmedlone127/2023-09-13-fernet_cc_slovak_sk.md new file mode 100644 index 00000000000000..399e209c6e5596 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-fernet_cc_slovak_sk.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Slovak fernet_cc_slovak BertEmbeddings from fav-kky +author: John Snow Labs +name: fernet_cc_slovak +date: 2023-09-13 +tags: [bert, sk, open_source, fill_mask, onnx] +task: Embeddings +language: sk +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fernet_cc_slovak` is a Slovak model originally trained by fav-kky. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fernet_cc_slovak_sk_5.1.1_3.0_1694637045822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fernet_cc_slovak_sk_5.1.1_3.0_1694637045822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fernet_cc_slovak","sk") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fernet_cc_slovak", "sk") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fernet_cc_slovak| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sk| +|Size:|609.4 MB| + +## References + +https://huggingface.co/fav-kky/FERNET-CC_sk \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-film20000bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-film20000bert_base_uncased_en.md new file mode 100644 index 00000000000000..bd4bc9f9a5acfc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-film20000bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English film20000bert_base_uncased BertEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film20000bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film20000bert_base_uncased` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film20000bert_base_uncased_en_5.1.1_3.0_1694587815260.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film20000bert_base_uncased_en_5.1.1_3.0_1694587815260.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("film20000bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("film20000bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film20000bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film20000bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-film95000bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-film95000bert_base_uncased_en.md new file mode 100644 index 00000000000000..dbc7a7ad03953c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-film95000bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English film95000bert_base_uncased BertEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film95000bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film95000bert_base_uncased` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film95000bert_base_uncased_en_5.1.1_3.0_1694594888745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film95000bert_base_uncased_en_5.1.1_3.0_1694594888745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("film95000bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("film95000bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film95000bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film95000bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-film98991bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-film98991bert_base_uncased_en.md new file mode 100644 index 00000000000000..6a2666aa9d5ed5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-film98991bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English film98991bert_base_uncased BertEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film98991bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film98991bert_base_uncased` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film98991bert_base_uncased_en_5.1.1_3.0_1694592232167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film98991bert_base_uncased_en_5.1.1_3.0_1694592232167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("film98991bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("film98991bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film98991bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film98991bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-financialbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-financialbert_en.md new file mode 100644 index 00000000000000..edd59bfd4bb5af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-financialbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English financialbert BertEmbeddings from ahmedrachid +author: John Snow Labs +name: financialbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`financialbert` is a English model originally trained by ahmedrachid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/financialbert_en_5.1.1_3.0_1694577402809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/financialbert_en_5.1.1_3.0_1694577402809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("financialbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("financialbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|financialbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/ahmedrachid/FinancialBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-finbert_lm_finetuned_news_en.md b/docs/_posts/ahmedlone127/2023-09-13-finbert_lm_finetuned_news_en.md new file mode 100644 index 00000000000000..1b746cf42f86bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-finbert_lm_finetuned_news_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English finbert_lm_finetuned_news BertEmbeddings from HoseinPanahi +author: John Snow Labs +name: finbert_lm_finetuned_news +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_lm_finetuned_news` is a English model originally trained by HoseinPanahi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_lm_finetuned_news_en_5.1.1_3.0_1694585481904.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_lm_finetuned_news_en_5.1.1_3.0_1694585481904.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finbert_lm_finetuned_news","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finbert_lm_finetuned_news", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_lm_finetuned_news| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/HoseinPanahi/finbert-lm-finetuned-news \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-finbert_pretrain_tianzhou_en.md b/docs/_posts/ahmedlone127/2023-09-13-finbert_pretrain_tianzhou_en.md new file mode 100644 index 00000000000000..9b5a0a539c6ad5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-finbert_pretrain_tianzhou_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English finbert_pretrain_tianzhou BertEmbeddings from Tianzhou +author: John Snow Labs +name: finbert_pretrain_tianzhou +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_pretrain_tianzhou` is a English model originally trained by Tianzhou. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_pretrain_tianzhou_en_5.1.1_3.0_1694591031769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_pretrain_tianzhou_en_5.1.1_3.0_1694591031769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finbert_pretrain_tianzhou","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finbert_pretrain_tianzhou", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_pretrain_tianzhou| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/Tianzhou/finbert-pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-finbert_pretrain_yiyanghkust_en.md b/docs/_posts/ahmedlone127/2023-09-13-finbert_pretrain_yiyanghkust_en.md new file mode 100644 index 00000000000000..7e19262bbe0b91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-finbert_pretrain_yiyanghkust_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English finbert_pretrain_yiyanghkust BertEmbeddings from philschmid +author: John Snow Labs +name: finbert_pretrain_yiyanghkust +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_pretrain_yiyanghkust` is a English model originally trained by philschmid. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_pretrain_yiyanghkust_en_5.1.1_3.0_1694563205165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_pretrain_yiyanghkust_en_5.1.1_3.0_1694563205165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finbert_pretrain_yiyanghkust","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finbert_pretrain_yiyanghkust", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_pretrain_yiyanghkust| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/philschmid/finbert-pretrain-yiyanghkust \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-finbert_wechsel_korean_en.md b/docs/_posts/ahmedlone127/2023-09-13-finbert_wechsel_korean_en.md new file mode 100644 index 00000000000000..ce009c06b284aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-finbert_wechsel_korean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English finbert_wechsel_korean BertEmbeddings from LeverageX +author: John Snow Labs +name: finbert_wechsel_korean +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finbert_wechsel_korean` is a English model originally trained by LeverageX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finbert_wechsel_korean_en_5.1.1_3.0_1694566000716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finbert_wechsel_korean_en_5.1.1_3.0_1694566000716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("finbert_wechsel_korean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("finbert_wechsel_korean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finbert_wechsel_korean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.5 MB| + +## References + +https://huggingface.co/LeverageX/finbert-wechsel-korean \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-fine_tune_bert_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-13-fine_tune_bert_mlm_en.md new file mode 100644 index 00000000000000..30f540b20bfd4c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-fine_tune_bert_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fine_tune_bert_mlm BertEmbeddings from mjavadmt +author: John Snow Labs +name: fine_tune_bert_mlm +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tune_bert_mlm` is a English model originally trained by mjavadmt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tune_bert_mlm_en_5.1.1_3.0_1694622010218.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tune_bert_mlm_en_5.1.1_3.0_1694622010218.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fine_tune_bert_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fine_tune_bert_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tune_bert_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.4 MB| + +## References + +https://huggingface.co/mjavadmt/fine-tune-BERT-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-first_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-first_model_en.md new file mode 100644 index 00000000000000..b22012612473b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-first_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English first_model BertEmbeddings from songqian +author: John Snow Labs +name: first_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_model` is a English model originally trained by songqian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_model_en_5.1.1_3.0_1694575037544.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_model_en_5.1.1_3.0_1694575037544.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("first_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("first_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/songqian/first_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-first_try_rubert_200_16_16_10ep_en.md b/docs/_posts/ahmedlone127/2023-09-13-first_try_rubert_200_16_16_10ep_en.md new file mode 100644 index 00000000000000..6799aa0211c0ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-first_try_rubert_200_16_16_10ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English first_try_rubert_200_16_16_10ep BertEmbeddings from sergiyvl +author: John Snow Labs +name: first_try_rubert_200_16_16_10ep +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_try_rubert_200_16_16_10ep` is a English model originally trained by sergiyvl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_try_rubert_200_16_16_10ep_en_5.1.1_3.0_1694573249695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_try_rubert_200_16_16_10ep_en_5.1.1_3.0_1694573249695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("first_try_rubert_200_16_16_10ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("first_try_rubert_200_16_16_10ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_try_rubert_200_16_16_10ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.5 MB| + +## References + +https://huggingface.co/sergiyvl/first_try_RuBERT_200_16_16_10ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-first_try_rubert_200_16_16_25ep_en.md b/docs/_posts/ahmedlone127/2023-09-13-first_try_rubert_200_16_16_25ep_en.md new file mode 100644 index 00000000000000..115a223d9974ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-first_try_rubert_200_16_16_25ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English first_try_rubert_200_16_16_25ep BertEmbeddings from sergiyvl +author: John Snow Labs +name: first_try_rubert_200_16_16_25ep +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_try_rubert_200_16_16_25ep` is a English model originally trained by sergiyvl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_try_rubert_200_16_16_25ep_en_5.1.1_3.0_1694573484331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_try_rubert_200_16_16_25ep_en_5.1.1_3.0_1694573484331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("first_try_rubert_200_16_16_25ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("first_try_rubert_200_16_16_25ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_try_rubert_200_16_16_25ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.5 MB| + +## References + +https://huggingface.co/sergiyvl/first_try_RuBERT_200_16_16_25ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-further_train_2_domain_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-further_train_2_domain_10_en.md new file mode 100644 index 00000000000000..2f17d61ccd58d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-further_train_2_domain_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English further_train_2_domain_10 BertEmbeddings from onlydj96 +author: John Snow Labs +name: further_train_2_domain_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`further_train_2_domain_10` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/further_train_2_domain_10_en_5.1.1_3.0_1694592009482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/further_train_2_domain_10_en_5.1.1_3.0_1694592009482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("further_train_2_domain_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("further_train_2_domain_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|further_train_2_domain_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/further_train_2_domain_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-further_train_2_domain_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-further_train_2_domain_20_en.md new file mode 100644 index 00000000000000..edf83ab4338b87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-further_train_2_domain_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English further_train_2_domain_20 BertEmbeddings from onlydj96 +author: John Snow Labs +name: further_train_2_domain_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`further_train_2_domain_20` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/further_train_2_domain_20_en_5.1.1_3.0_1694592413273.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/further_train_2_domain_20_en_5.1.1_3.0_1694592413273.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("further_train_2_domain_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("further_train_2_domain_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|further_train_2_domain_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/further_train_2_domain_20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_10_en.md new file mode 100644 index 00000000000000..4c3169396a98f1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English further_train_domain_10 BertEmbeddings from onlydj96 +author: John Snow Labs +name: further_train_domain_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`further_train_domain_10` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/further_train_domain_10_en_5.1.1_3.0_1694588212891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/further_train_domain_10_en_5.1.1_3.0_1694588212891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("further_train_domain_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("further_train_domain_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|further_train_domain_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/further_train_domain_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_20_en.md new file mode 100644 index 00000000000000..c7d3db75c2fdcf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English further_train_domain_20 BertEmbeddings from onlydj96 +author: John Snow Labs +name: further_train_domain_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`further_train_domain_20` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/further_train_domain_20_en_5.1.1_3.0_1694588464159.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/further_train_domain_20_en_5.1.1_3.0_1694588464159.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("further_train_domain_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("further_train_domain_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|further_train_domain_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/further_train_domain_20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_30_en.md b/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_30_en.md new file mode 100644 index 00000000000000..7a1b3e9de9b684 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-further_train_domain_30_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English further_train_domain_30 BertEmbeddings from onlydj96 +author: John Snow Labs +name: further_train_domain_30 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`further_train_domain_30` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/further_train_domain_30_en_5.1.1_3.0_1694589245637.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/further_train_domain_30_en_5.1.1_3.0_1694589245637.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("further_train_domain_30","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("further_train_domain_30", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|further_train_domain_30| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/further_train_domain_30 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-further_train_intent_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-further_train_intent_10_en.md new file mode 100644 index 00000000000000..5456cf600ae19f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-further_train_intent_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English further_train_intent_10 BertEmbeddings from onlydj96 +author: John Snow Labs +name: further_train_intent_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`further_train_intent_10` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/further_train_intent_10_en_5.1.1_3.0_1694589450357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/further_train_intent_10_en_5.1.1_3.0_1694589450357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("further_train_intent_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("further_train_intent_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|further_train_intent_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/further_train_intent_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-further_train_original_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-further_train_original_10_en.md new file mode 100644 index 00000000000000..fbad8893e69a78 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-further_train_original_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English further_train_original_10 BertEmbeddings from onlydj96 +author: John Snow Labs +name: further_train_original_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`further_train_original_10` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/further_train_original_10_en_5.1.1_3.0_1694588052671.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/further_train_original_10_en_5.1.1_3.0_1694588052671.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("further_train_original_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("further_train_original_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|further_train_original_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/further_train_original_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-futuretod_base_v1.0_en.md b/docs/_posts/ahmedlone127/2023-09-13-futuretod_base_v1.0_en.md new file mode 100644 index 00000000000000..5ca5e0339d0fa5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-futuretod_base_v1.0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English futuretod_base_v1.0 BertEmbeddings from AndrewZeng +author: John Snow Labs +name: futuretod_base_v1.0 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`futuretod_base_v1.0` is a English model originally trained by AndrewZeng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/futuretod_base_v1.0_en_5.1.1_3.0_1694594942260.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/futuretod_base_v1.0_en_5.1.1_3.0_1694594942260.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("futuretod_base_v1.0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("futuretod_base_v1.0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|futuretod_base_v1.0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.6 MB| + +## References + +https://huggingface.co/AndrewZeng/futuretod-base-v1.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-german_bert_base_german_cased_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-german_bert_base_german_cased_finetuned_en.md new file mode 100644 index 00000000000000..8f88cccaac2903 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-german_bert_base_german_cased_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English german_bert_base_german_cased_finetuned BertEmbeddings from rodrigotuna +author: John Snow Labs +name: german_bert_base_german_cased_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_bert_base_german_cased_finetuned` is a English model originally trained by rodrigotuna. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_bert_base_german_cased_finetuned_en_5.1.1_3.0_1694565721535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_bert_base_german_cased_finetuned_en_5.1.1_3.0_1694565721535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("german_bert_base_german_cased_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("german_bert_base_german_cased_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_bert_base_german_cased_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/rodrigotuna/de-bert-base-german-cased-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-german_financial_statements_bert_de.md b/docs/_posts/ahmedlone127/2023-09-13-german_financial_statements_bert_de.md new file mode 100644 index 00000000000000..b46aa402b3c54c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-german_financial_statements_bert_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German german_financial_statements_bert BertEmbeddings from fabianrausch +author: John Snow Labs +name: german_financial_statements_bert +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_financial_statements_bert` is a German model originally trained by fabianrausch. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_financial_statements_bert_de_5.1.1_3.0_1694604897426.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_financial_statements_bert_de_5.1.1_3.0_1694604897426.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("german_financial_statements_bert","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("german_financial_statements_bert", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_financial_statements_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/fabianrausch/german-financial-statements-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-german_medbert_de.md b/docs/_posts/ahmedlone127/2023-09-13-german_medbert_de.md new file mode 100644 index 00000000000000..2613b3e2831b95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-german_medbert_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German german_medbert BertEmbeddings from smanjil +author: John Snow Labs +name: german_medbert +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_medbert` is a German model originally trained by smanjil. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_medbert_de_5.1.1_3.0_1694574616438.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_medbert_de_5.1.1_3.0_1694574616438.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("german_medbert","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("german_medbert", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_medbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/smanjil/German-MedBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-german_medbert_issues_128_de.md b/docs/_posts/ahmedlone127/2023-09-13-german_medbert_issues_128_de.md new file mode 100644 index 00000000000000..94617b60f14ed3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-german_medbert_issues_128_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German german_medbert_issues_128 BertEmbeddings from ogimgio +author: John Snow Labs +name: german_medbert_issues_128 +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_medbert_issues_128` is a German model originally trained by ogimgio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_medbert_issues_128_de_5.1.1_3.0_1694612124790.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_medbert_issues_128_de_5.1.1_3.0_1694612124790.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("german_medbert_issues_128","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("german_medbert_issues_128", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_medbert_issues_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/ogimgio/German-MedBERT-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-greek_media_bert_base_uncased_el.md b/docs/_posts/ahmedlone127/2023-09-13-greek_media_bert_base_uncased_el.md new file mode 100644 index 00000000000000..d47f66ca464251 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-greek_media_bert_base_uncased_el.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Modern Greek (1453-) greek_media_bert_base_uncased BertEmbeddings from dimitriz +author: John Snow Labs +name: greek_media_bert_base_uncased +date: 2023-09-13 +tags: [bert, el, open_source, fill_mask, onnx] +task: Embeddings +language: el +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`greek_media_bert_base_uncased` is a Modern Greek (1453-) model originally trained by dimitriz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/greek_media_bert_base_uncased_el_5.1.1_3.0_1694581631448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/greek_media_bert_base_uncased_el_5.1.1_3.0_1694581631448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("greek_media_bert_base_uncased","el") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("greek_media_bert_base_uncased", "el") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|greek_media_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|el| +|Size:|420.8 MB| + +## References + +https://huggingface.co/dimitriz/greek-media-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-greeksocialbert_base_greek_social_media_v2_el.md b/docs/_posts/ahmedlone127/2023-09-13-greeksocialbert_base_greek_social_media_v2_el.md new file mode 100644 index 00000000000000..2eecd22367db6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-greeksocialbert_base_greek_social_media_v2_el.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Modern Greek (1453-) greeksocialbert_base_greek_social_media_v2 BertEmbeddings from pchatz +author: John Snow Labs +name: greeksocialbert_base_greek_social_media_v2 +date: 2023-09-13 +tags: [bert, el, open_source, fill_mask, onnx] +task: Embeddings +language: el +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`greeksocialbert_base_greek_social_media_v2` is a Modern Greek (1453-) model originally trained by pchatz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/greeksocialbert_base_greek_social_media_v2_el_5.1.1_3.0_1694613573415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/greeksocialbert_base_greek_social_media_v2_el_5.1.1_3.0_1694613573415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("greeksocialbert_base_greek_social_media_v2","el") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("greeksocialbert_base_greek_social_media_v2", "el") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|greeksocialbert_base_greek_social_media_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|el| +|Size:|421.3 MB| + +## References + +https://huggingface.co/pchatz/greeksocialbert-base-greek-social-media-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-greeksocialbert_base_greek_uncased_v1_el.md b/docs/_posts/ahmedlone127/2023-09-13-greeksocialbert_base_greek_uncased_v1_el.md new file mode 100644 index 00000000000000..c20e71174595ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-greeksocialbert_base_greek_uncased_v1_el.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Modern Greek (1453-) greeksocialbert_base_greek_uncased_v1 BertEmbeddings from gealexandri +author: John Snow Labs +name: greeksocialbert_base_greek_uncased_v1 +date: 2023-09-13 +tags: [bert, el, open_source, fill_mask, onnx] +task: Embeddings +language: el +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`greeksocialbert_base_greek_uncased_v1` is a Modern Greek (1453-) model originally trained by gealexandri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/greeksocialbert_base_greek_uncased_v1_el_5.1.1_3.0_1694648049169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/greeksocialbert_base_greek_uncased_v1_el_5.1.1_3.0_1694648049169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("greeksocialbert_base_greek_uncased_v1","el") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("greeksocialbert_base_greek_uncased_v1", "el") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|greeksocialbert_base_greek_uncased_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|el| +|Size:|421.3 MB| + +## References + +https://huggingface.co/gealexandri/greeksocialbert-base-greek-uncased-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-gujarati_bert_gu.md b/docs/_posts/ahmedlone127/2023-09-13-gujarati_bert_gu.md new file mode 100644 index 00000000000000..afebfe448e345c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-gujarati_bert_gu.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Gujarati gujarati_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: gujarati_bert +date: 2023-09-13 +tags: [bert, gu, open_source, fill_mask, onnx] +task: Embeddings +language: gu +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gujarati_bert` is a Gujarati model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gujarati_bert_gu_5.1.1_3.0_1694642031980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gujarati_bert_gu_5.1.1_3.0_1694642031980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gujarati_bert","gu") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gujarati_bert", "gu") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gujarati_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|gu| +|Size:|890.4 MB| + +## References + +https://huggingface.co/l3cube-pune/gujarati-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-gujibert_fan_en.md b/docs/_posts/ahmedlone127/2023-09-13-gujibert_fan_en.md new file mode 100644 index 00000000000000..974ed93ec27adc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-gujibert_fan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gujibert_fan BertEmbeddings from hsc748NLP +author: John Snow Labs +name: gujibert_fan +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gujibert_fan` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gujibert_fan_en_5.1.1_3.0_1694564628936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gujibert_fan_en_5.1.1_3.0_1694564628936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gujibert_fan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gujibert_fan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gujibert_fan| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.2 MB| + +## References + +https://huggingface.co/hsc748NLP/GujiBERT_fan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-gujibert_jian_fan_en.md b/docs/_posts/ahmedlone127/2023-09-13-gujibert_jian_fan_en.md new file mode 100644 index 00000000000000..5e085a7a7f5f49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-gujibert_jian_fan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gujibert_jian_fan BertEmbeddings from hsc748NLP +author: John Snow Labs +name: gujibert_jian_fan +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gujibert_jian_fan` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gujibert_jian_fan_en_5.1.1_3.0_1694602793200.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gujibert_jian_fan_en_5.1.1_3.0_1694602793200.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gujibert_jian_fan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gujibert_jian_fan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gujibert_jian_fan| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.3 MB| + +## References + +https://huggingface.co/hsc748NLP/GujiBERT_jian_fan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-gujiroberta_fan_en.md b/docs/_posts/ahmedlone127/2023-09-13-gujiroberta_fan_en.md new file mode 100644 index 00000000000000..ad0d5548c74829 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-gujiroberta_fan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gujiroberta_fan BertEmbeddings from hsc748NLP +author: John Snow Labs +name: gujiroberta_fan +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gujiroberta_fan` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gujiroberta_fan_en_5.1.1_3.0_1694602043407.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gujiroberta_fan_en_5.1.1_3.0_1694602043407.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gujiroberta_fan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gujiroberta_fan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gujiroberta_fan| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.2 MB| + +## References + +https://huggingface.co/hsc748NLP/GujiRoBERTa_fan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-gujiroberta_jian_fan_en.md b/docs/_posts/ahmedlone127/2023-09-13-gujiroberta_jian_fan_en.md new file mode 100644 index 00000000000000..54611211e4320c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-gujiroberta_jian_fan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gujiroberta_jian_fan BertEmbeddings from hsc748NLP +author: John Snow Labs +name: gujiroberta_jian_fan +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gujiroberta_jian_fan` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gujiroberta_jian_fan_en_5.1.1_3.0_1694603135270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gujiroberta_jian_fan_en_5.1.1_3.0_1694603135270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gujiroberta_jian_fan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gujiroberta_jian_fan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gujiroberta_jian_fan| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.2 MB| + +## References + +https://huggingface.co/hsc748NLP/GujiRoBERTa_jian_fan \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hebert_en.md b/docs/_posts/ahmedlone127/2023-09-13-hebert_en.md new file mode 100644 index 00000000000000..c005513098f274 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hebert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hebert BertEmbeddings from avichr +author: John Snow Labs +name: hebert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hebert` is a English model originally trained by avichr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hebert_en_5.1.1_3.0_1694584681748.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hebert_en_5.1.1_3.0_1694584681748.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hebert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hebert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hebert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/avichr/heBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-helloworld_en.md b/docs/_posts/ahmedlone127/2023-09-13-helloworld_en.md new file mode 100644 index 00000000000000..6b1f69586a01de --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-helloworld_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English helloworld BertEmbeddings from EternalPursuit +author: John Snow Labs +name: helloworld +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`helloworld` is a English model originally trained by EternalPursuit. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/helloworld_en_5.1.1_3.0_1694579975322.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/helloworld_en_5.1.1_3.0_1694579975322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("helloworld","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("helloworld", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|helloworld| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EternalPursuit/HelloWorld \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_embed_en.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_embed_en.md new file mode 100644 index 00000000000000..fd28eae5bf1df2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_embed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hindi_bert_embed BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_bert_embed +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_bert_embed` is a English model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_bert_embed_en_5.1.1_3.0_1694591488555.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_bert_embed_en_5.1.1_3.0_1694591488555.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_bert_embed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_bert_embed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_bert_embed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|890.8 MB| + +## References + +https://huggingface.co/l3cube-pune/hi-bert-embed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_en.md new file mode 100644 index 00000000000000..562516ed5fe44a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hindi_bert BertEmbeddings from sukritin +author: John Snow Labs +name: hindi_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_bert` is a English model originally trained by sukritin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_bert_en_5.1.1_3.0_1694575763223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_bert_en_5.1.1_3.0_1694575763223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|609.2 MB| + +## References + +https://huggingface.co/sukritin/hindi-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_scratch_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_scratch_hi.md new file mode 100644 index 00000000000000..a6f3495e42142e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_scratch_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_bert_scratch +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_bert_scratch` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_bert_scratch_hi_5.1.1_3.0_1694579566926.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_bert_scratch_hi_5.1.1_3.0_1694579566926.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_bert_scratch","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_bert_scratch", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|470.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v1_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v1_hi.md new file mode 100644 index 00000000000000..3389947f94522e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v1_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_bert_v1 BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_bert_v1 +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_bert_v1` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_bert_v1_hi_5.1.1_3.0_1694575771399.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_bert_v1_hi_5.1.1_3.0_1694575771399.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_bert_v1","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_bert_v1", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_bert_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|663.8 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-bert-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v2_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v2_hi.md new file mode 100644 index 00000000000000..5f8abc7871dc57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_bert_v2_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_bert_v2 BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_bert_v2 +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_bert_v2` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_bert_v2_hi_5.1.1_3.0_1694576005164.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_bert_v2_hi_5.1.1_3.0_1694576005164.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_bert_v2","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_bert_v2", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_bert_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-bert-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_least_haitian_1m_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_least_haitian_1m_hi.md new file mode 100644 index 00000000000000..da1c809c5ea558 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_least_haitian_1m_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_least_haitian_1m BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_least_haitian_1m +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_least_haitian_1m` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_least_haitian_1m_hi_5.1.1_3.0_1694584509062.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_least_haitian_1m_hi_5.1.1_3.0_1694584509062.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_least_haitian_1m","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_least_haitian_1m", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_least_haitian_1m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hi-least-ht-1m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_hi.md new file mode 100644 index 00000000000000..9e3e31dd3ed22f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_marathi_dev_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_marathi_dev_bert +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_marathi_dev_bert` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_bert_hi_5.1.1_3.0_1694576361222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_bert_hi_5.1.1_3.0_1694576361222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_marathi_dev_bert","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_marathi_dev_bert", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_marathi_dev_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-marathi-dev-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_scratch_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_scratch_hi.md new file mode 100644 index 00000000000000..05db5ed8b72c67 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_marathi_dev_bert_scratch_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_marathi_dev_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_marathi_dev_bert_scratch +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_marathi_dev_bert_scratch` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_bert_scratch_hi_5.1.1_3.0_1694579727628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_marathi_dev_bert_scratch_hi_5.1.1_3.0_1694579727628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_marathi_dev_bert_scratch","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_marathi_dev_bert_scratch", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_marathi_dev_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|608.7 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-marathi-dev-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_random_twt_1m_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_random_twt_1m_hi.md new file mode 100644 index 00000000000000..867ab58c004c9e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_random_twt_1m_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_random_twt_1m BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_random_twt_1m +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_random_twt_1m` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_random_twt_1m_hi_5.1.1_3.0_1694584742371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_random_twt_1m_hi_5.1.1_3.0_1694584742371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_random_twt_1m","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_random_twt_1m", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_random_twt_1m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hi-random-twt-1m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hateful_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hateful_hi.md new file mode 100644 index 00000000000000..da69bfdddb0cb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hateful_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_tweets_bert_hateful BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_tweets_bert_hateful +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_tweets_bert_hateful` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_hateful_hi_5.1.1_3.0_1694584250203.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_hateful_hi_5.1.1_3.0_1694584250203.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_tweets_bert_hateful","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_tweets_bert_hateful", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_tweets_bert_hateful| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-tweets-bert-hateful \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hi.md new file mode 100644 index 00000000000000..e059fc94d31bb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_tweets_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_tweets_bert +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_tweets_bert` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_hi_5.1.1_3.0_1694581273510.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_hi_5.1.1_3.0_1694581273510.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_tweets_bert","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_tweets_bert", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_tweets_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.7 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-tweets-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_scratch_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_scratch_hi.md new file mode 100644 index 00000000000000..e4bae79be16f65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_scratch_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_tweets_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_tweets_bert_scratch +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_tweets_bert_scratch` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_scratch_hi_5.1.1_3.0_1694632323915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_scratch_hi_5.1.1_3.0_1694632323915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_tweets_bert_scratch","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_tweets_bert_scratch", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_tweets_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|470.5 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-tweets-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_v2_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_v2_hi.md new file mode 100644 index 00000000000000..ad861f3e42188c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hindi_tweets_bert_v2_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hindi_tweets_bert_v2 BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hindi_tweets_bert_v2 +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hindi_tweets_bert_v2` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_v2_hi_5.1.1_3.0_1694585904644.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hindi_tweets_bert_v2_hi_5.1.1_3.0_1694585904644.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hindi_tweets_bert_v2","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hindi_tweets_bert_v2", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hindi_tweets_bert_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.7 MB| + +## References + +https://huggingface.co/l3cube-pune/hindi-tweets-bert-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hing_bert_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hing_bert_hi.md new file mode 100644 index 00000000000000..a3be6df300e822 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hing_bert_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hing_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hing_bert +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hing_bert` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hing_bert_hi_5.1.1_3.0_1694603455445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hing_bert_hi_5.1.1_3.0_1694603455445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hing_bert","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hing_bert", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hing_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|407.2 MB| + +## References + +https://huggingface.co/l3cube-pune/hing-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_hi.md new file mode 100644 index 00000000000000..7134c5d87651b9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hing_mbert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hing_mbert +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hing_mbert` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hing_mbert_hi_5.1.1_3.0_1694603989005.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hing_mbert_hi_5.1.1_3.0_1694603989005.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hing_mbert","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hing_mbert", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hing_mbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|664.9 MB| + +## References + +https://huggingface.co/l3cube-pune/hing-mbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_mixed_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_mixed_hi.md new file mode 100644 index 00000000000000..d326b13d1ee1a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_mixed_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hing_mbert_mixed BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hing_mbert_mixed +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hing_mbert_mixed` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hing_mbert_mixed_hi_5.1.1_3.0_1694618447086.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hing_mbert_mixed_hi_5.1.1_3.0_1694618447086.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hing_mbert_mixed","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hing_mbert_mixed", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hing_mbert_mixed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|664.9 MB| + +## References + +https://huggingface.co/l3cube-pune/hing-mbert-mixed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_mixed_v2_hi.md b/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_mixed_v2_hi.md new file mode 100644 index 00000000000000..9eff007ee96b4b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hing_mbert_mixed_v2_hi.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hindi hing_mbert_mixed_v2 BertEmbeddings from l3cube-pune +author: John Snow Labs +name: hing_mbert_mixed_v2 +date: 2023-09-13 +tags: [bert, hi, open_source, fill_mask, onnx] +task: Embeddings +language: hi +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hing_mbert_mixed_v2` is a Hindi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hing_mbert_mixed_v2_hi_5.1.1_3.0_1694591335498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hing_mbert_mixed_v2_hi_5.1.1_3.0_1694591335498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hing_mbert_mixed_v2","hi") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hing_mbert_mixed_v2", "hi") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hing_mbert_mixed_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|hi| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/hing-mbert-mixed-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hinglish_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-hinglish_finetuned_en.md new file mode 100644 index 00000000000000..d1358666fe5932 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hinglish_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hinglish_finetuned BertEmbeddings from ketan-rmcf +author: John Snow Labs +name: hinglish_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hinglish_finetuned` is a English model originally trained by ketan-rmcf. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hinglish_finetuned_en_5.1.1_3.0_1694636325882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hinglish_finetuned_en_5.1.1_3.0_1694636325882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hinglish_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hinglish_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hinglish_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/ketan-rmcf/hinglish-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-hsebert_italian_cased_it.md b/docs/_posts/ahmedlone127/2023-09-13-hsebert_italian_cased_it.md new file mode 100644 index 00000000000000..a550c16626afee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-hsebert_italian_cased_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian hsebert_italian_cased BertEmbeddings from bullmount +author: John Snow Labs +name: hsebert_italian_cased +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hsebert_italian_cased` is a Italian model originally trained by bullmount. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hsebert_italian_cased_it_5.1.1_3.0_1694589993209.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hsebert_italian_cased_it_5.1.1_3.0_1694589993209.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("hsebert_italian_cased","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("hsebert_italian_cased", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hsebert_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.9 MB| + +## References + +https://huggingface.co/bullmount/hseBert-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-improvedabg_10_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_10_epochs_en.md new file mode 100644 index 00000000000000..820db58cbc7265 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_10_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English improvedabg_10_epochs BertEmbeddings from Embible +author: John Snow Labs +name: improvedabg_10_epochs +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`improvedabg_10_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/improvedabg_10_epochs_en_5.1.1_3.0_1694563262843.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/improvedabg_10_epochs_en_5.1.1_3.0_1694563262843.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("improvedabg_10_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("improvedabg_10_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|improvedabg_10_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|689.6 MB| + +## References + +https://huggingface.co/Embible/improvedABG-10-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-improvedabg_20_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_20_epochs_en.md new file mode 100644 index 00000000000000..0b353761ff19b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_20_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English improvedabg_20_epochs BertEmbeddings from Embible +author: John Snow Labs +name: improvedabg_20_epochs +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`improvedabg_20_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/improvedabg_20_epochs_en_5.1.1_3.0_1694563461257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/improvedabg_20_epochs_en_5.1.1_3.0_1694563461257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("improvedabg_20_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("improvedabg_20_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|improvedabg_20_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|689.6 MB| + +## References + +https://huggingface.co/Embible/improvedABG-20-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-improvedabg_50_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_50_epochs_en.md new file mode 100644 index 00000000000000..7198fd3847eb15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-improvedabg_50_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English improvedabg_50_epochs BertEmbeddings from Embible +author: John Snow Labs +name: improvedabg_50_epochs +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`improvedabg_50_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/improvedabg_50_epochs_en_5.1.1_3.0_1694563658734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/improvedabg_50_epochs_en_5.1.1_3.0_1694563658734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("improvedabg_50_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("improvedabg_50_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|improvedabg_50_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|689.4 MB| + +## References + +https://huggingface.co/Embible/improvedABG-50-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-incaselawbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-incaselawbert_en.md new file mode 100644 index 00000000000000..40ad49508ef8dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-incaselawbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English incaselawbert BertEmbeddings from law-ai +author: John Snow Labs +name: incaselawbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incaselawbert` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incaselawbert_en_5.1.1_3.0_1694582926461.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incaselawbert_en_5.1.1_3.0_1694582926461.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("incaselawbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("incaselawbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incaselawbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/law-ai/InCaseLawBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_1000k_english_xx.md b/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_1000k_english_xx.md new file mode 100644 index 00000000000000..2c7bf1886e701b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_1000k_english_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual incel_bert_base_multilingual_cased_1000k_english BertEmbeddings from pgajo +author: John Snow Labs +name: incel_bert_base_multilingual_cased_1000k_english +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incel_bert_base_multilingual_cased_1000k_english` is a Multilingual model originally trained by pgajo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incel_bert_base_multilingual_cased_1000k_english_xx_5.1.1_3.0_1694613006554.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incel_bert_base_multilingual_cased_1000k_english_xx_5.1.1_3.0_1694613006554.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("incel_bert_base_multilingual_cased_1000k_english","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("incel_bert_base_multilingual_cased_1000k_english", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incel_bert_base_multilingual_cased_1000k_english| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.1 MB| + +## References + +https://huggingface.co/pgajo/incel-bert-base-multilingual-cased-1000k_english \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_1000k_multi_xx.md b/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_1000k_multi_xx.md new file mode 100644 index 00000000000000..ce868db95ed37e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_1000k_multi_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual incel_bert_base_multilingual_cased_1000k_multi BertEmbeddings from pgajo +author: John Snow Labs +name: incel_bert_base_multilingual_cased_1000k_multi +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incel_bert_base_multilingual_cased_1000k_multi` is a Multilingual model originally trained by pgajo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incel_bert_base_multilingual_cased_1000k_multi_xx_5.1.1_3.0_1694613670535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incel_bert_base_multilingual_cased_1000k_multi_xx_5.1.1_3.0_1694613670535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("incel_bert_base_multilingual_cased_1000k_multi","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("incel_bert_base_multilingual_cased_1000k_multi", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incel_bert_base_multilingual_cased_1000k_multi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.1 MB| + +## References + +https://huggingface.co/pgajo/incel-bert-base-multilingual-cased-1000k_multi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_627k_italian_xx.md b/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_627k_italian_xx.md new file mode 100644 index 00000000000000..485adf1ed601bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-incel_bert_base_multilingual_cased_627k_italian_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual incel_bert_base_multilingual_cased_627k_italian BertEmbeddings from pgajo +author: John Snow Labs +name: incel_bert_base_multilingual_cased_627k_italian +date: 2023-09-13 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incel_bert_base_multilingual_cased_627k_italian` is a Multilingual model originally trained by pgajo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incel_bert_base_multilingual_cased_627k_italian_xx_5.1.1_3.0_1694612397380.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incel_bert_base_multilingual_cased_627k_italian_xx_5.1.1_3.0_1694612397380.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("incel_bert_base_multilingual_cased_627k_italian","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("incel_bert_base_multilingual_cased_627k_italian", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incel_bert_base_multilingual_cased_627k_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/pgajo/incel-bert-base-multilingual-cased-627k_italian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian_en.md b/docs/_posts/ahmedlone127/2023-09-13-incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian_en.md new file mode 100644 index 00000000000000..aa8032572192c6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian BertEmbeddings from pgajo +author: John Snow Labs +name: incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian` is a English model originally trained by pgajo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian_en_5.1.1_3.0_1694621049338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian_en_5.1.1_3.0_1694621049338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incel_bert_uncased_l_12_h_768_a_12_italian_alb3rt0_627k_italian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|688.7 MB| + +## References + +https://huggingface.co/pgajo/incel-bert_uncased_L-12_H-768_A-12_italian_alb3rt0-627k_italian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-indo1_en.md b/docs/_posts/ahmedlone127/2023-09-13-indo1_en.md new file mode 100644 index 00000000000000..a00868e5b1e601 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-indo1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English indo1 BertEmbeddings from willemjan +author: John Snow Labs +name: indo1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indo1` is a English model originally trained by willemjan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indo1_en_5.1.1_3.0_1694584468221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indo1_en_5.1.1_3.0_1694584468221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indo1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indo1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indo1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/willemjan/indo1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-indo2_en.md b/docs/_posts/ahmedlone127/2023-09-13-indo2_en.md new file mode 100644 index 00000000000000..7456ede4fd2e5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-indo2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English indo2 BertEmbeddings from willemjan +author: John Snow Labs +name: indo2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indo2` is a English model originally trained by willemjan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indo2_en_5.1.1_3.0_1694584681956.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indo2_en_5.1.1_3.0_1694584681956.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indo2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indo2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indo2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/willemjan/indo2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-indo_legalbert_id.md b/docs/_posts/ahmedlone127/2023-09-13-indo_legalbert_id.md new file mode 100644 index 00000000000000..3b082f0dec52ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-indo_legalbert_id.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Indonesian indo_legalbert BertEmbeddings from archi-ai +author: John Snow Labs +name: indo_legalbert +date: 2023-09-13 +tags: [bert, id, open_source, fill_mask, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indo_legalbert` is a Indonesian model originally trained by archi-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indo_legalbert_id_5.1.1_3.0_1694623791636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indo_legalbert_id_5.1.1_3.0_1694623791636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indo_legalbert","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indo_legalbert", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indo_legalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|id| +|Size:|1.3 GB| + +## References + +https://huggingface.co/archi-ai/Indo-LegalBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_10k_en.md b/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_10k_en.md new file mode 100644 index 00000000000000..0d647abad05f0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_10k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English indobert_base_p2_finetuned_mer_10k BertEmbeddings from stevenwh +author: John Snow Labs +name: indobert_base_p2_finetuned_mer_10k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobert_base_p2_finetuned_mer_10k` is a English model originally trained by stevenwh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobert_base_p2_finetuned_mer_10k_en_5.1.1_3.0_1694636325821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobert_base_p2_finetuned_mer_10k_en_5.1.1_3.0_1694636325821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indobert_base_p2_finetuned_mer_10k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indobert_base_p2_finetuned_mer_10k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobert_base_p2_finetuned_mer_10k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/stevenwh/indobert-base-p2-finetuned-mer-10k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_80k_en.md b/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_80k_en.md new file mode 100644 index 00000000000000..bdbc7e1959afe3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_80k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English indobert_base_p2_finetuned_mer_80k BertEmbeddings from stevenwh +author: John Snow Labs +name: indobert_base_p2_finetuned_mer_80k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobert_base_p2_finetuned_mer_80k` is a English model originally trained by stevenwh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobert_base_p2_finetuned_mer_80k_en_5.1.1_3.0_1694636807078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobert_base_p2_finetuned_mer_80k_en_5.1.1_3.0_1694636807078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indobert_base_p2_finetuned_mer_80k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indobert_base_p2_finetuned_mer_80k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobert_base_p2_finetuned_mer_80k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/stevenwh/indobert-base-p2-finetuned-mer-80k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_en.md b/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_en.md new file mode 100644 index 00000000000000..0f14ef376455f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-indobert_base_p2_finetuned_mer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English indobert_base_p2_finetuned_mer BertEmbeddings from stevenwh +author: John Snow Labs +name: indobert_base_p2_finetuned_mer +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indobert_base_p2_finetuned_mer` is a English model originally trained by stevenwh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indobert_base_p2_finetuned_mer_en_5.1.1_3.0_1694629744633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indobert_base_p2_finetuned_mer_en_5.1.1_3.0_1694629744633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indobert_base_p2_finetuned_mer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indobert_base_p2_finetuned_mer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indobert_base_p2_finetuned_mer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/stevenwh/indobert-base-p2-finetuned-mer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-indojave_codemixed_bert_base_id.md b/docs/_posts/ahmedlone127/2023-09-13-indojave_codemixed_bert_base_id.md new file mode 100644 index 00000000000000..485c423bba9a53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-indojave_codemixed_bert_base_id.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Indonesian indojave_codemixed_bert_base BertEmbeddings from fathan +author: John Snow Labs +name: indojave_codemixed_bert_base +date: 2023-09-13 +tags: [bert, id, open_source, fill_mask, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indojave_codemixed_bert_base` is a Indonesian model originally trained by fathan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indojave_codemixed_bert_base_id_5.1.1_3.0_1694574763045.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indojave_codemixed_bert_base_id_5.1.1_3.0_1694574763045.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("indojave_codemixed_bert_base","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("indojave_codemixed_bert_base", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indojave_codemixed_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|id| +|Size:|464.6 MB| + +## References + +https://huggingface.co/fathan/indojave-codemixed-bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_cbp_lkg_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_cbp_lkg_finetuned_en.md new file mode 100644 index 00000000000000..12dec567dbd231 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_cbp_lkg_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inlegalbert_cbp_lkg_finetuned BertEmbeddings from kinshuk-h +author: John Snow Labs +name: inlegalbert_cbp_lkg_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inlegalbert_cbp_lkg_finetuned` is a English model originally trained by kinshuk-h. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inlegalbert_cbp_lkg_finetuned_en_5.1.1_3.0_1694606435709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inlegalbert_cbp_lkg_finetuned_en_5.1.1_3.0_1694606435709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("inlegalbert_cbp_lkg_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("inlegalbert_cbp_lkg_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inlegalbert_cbp_lkg_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/kinshuk-h/InLegalBERT-cbp-lkg-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_en.md new file mode 100644 index 00000000000000..e589b3dc1280a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-inlegalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inlegalbert BertEmbeddings from law-ai +author: John Snow Labs +name: inlegalbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inlegalbert` is a English model originally trained by law-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inlegalbert_en_5.1.1_3.0_1694582770345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inlegalbert_en_5.1.1_3.0_1694582770345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("inlegalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("inlegalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inlegalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/law-ai/InLegalBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-intent_further_pretrain_en.md b/docs/_posts/ahmedlone127/2023-09-13-intent_further_pretrain_en.md new file mode 100644 index 00000000000000..7961383d850226 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-intent_further_pretrain_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English intent_further_pretrain BertEmbeddings from onlydj96 +author: John Snow Labs +name: intent_further_pretrain +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`intent_further_pretrain` is a English model originally trained by onlydj96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/intent_further_pretrain_en_5.1.1_3.0_1694587646285.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/intent_further_pretrain_en_5.1.1_3.0_1694587646285.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("intent_further_pretrain","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("intent_further_pretrain", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|intent_further_pretrain| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.2 MB| + +## References + +https://huggingface.co/onlydj96/intent_further_pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-itcast_nlp_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-itcast_nlp_base_en.md new file mode 100644 index 00000000000000..4338d8b6d021ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-itcast_nlp_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English itcast_nlp_base BertEmbeddings from suojianhua +author: John Snow Labs +name: itcast_nlp_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`itcast_nlp_base` is a English model originally trained by suojianhua. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/itcast_nlp_base_en_5.1.1_3.0_1694575926111.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/itcast_nlp_base_en_5.1.1_3.0_1694575926111.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("itcast_nlp_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("itcast_nlp_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|itcast_nlp_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/suojianhua/itcast-nlp-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_imdb_jv.md b/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_imdb_jv.md new file mode 100644 index 00000000000000..498e4e60dc789f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_imdb_jv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Javanese javanese_bert_small_imdb BertEmbeddings from w11wo +author: John Snow Labs +name: javanese_bert_small_imdb +date: 2023-09-13 +tags: [bert, jv, open_source, fill_mask, onnx] +task: Embeddings +language: jv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`javanese_bert_small_imdb` is a Javanese model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/javanese_bert_small_imdb_jv_5.1.1_3.0_1694582477865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/javanese_bert_small_imdb_jv_5.1.1_3.0_1694582477865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("javanese_bert_small_imdb","jv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("javanese_bert_small_imdb", "jv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|javanese_bert_small_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|jv| +|Size:|407.3 MB| + +## References + +https://huggingface.co/w11wo/javanese-bert-small-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_jv.md b/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_jv.md new file mode 100644 index 00000000000000..0463665dbd6178 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-javanese_bert_small_jv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Javanese javanese_bert_small BertEmbeddings from w11wo +author: John Snow Labs +name: javanese_bert_small +date: 2023-09-13 +tags: [bert, jv, open_source, fill_mask, onnx] +task: Embeddings +language: jv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`javanese_bert_small` is a Javanese model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/javanese_bert_small_jv_5.1.1_3.0_1694582650100.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/javanese_bert_small_jv_5.1.1_3.0_1694582650100.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("javanese_bert_small","jv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("javanese_bert_small", "jv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|javanese_bert_small| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|jv| +|Size:|407.3 MB| + +## References + +https://huggingface.co/w11wo/javanese-bert-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-jobbert_base_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-jobbert_base_cased_en.md new file mode 100644 index 00000000000000..d189235ed2ed98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-jobbert_base_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English jobbert_base_cased BertEmbeddings from jjzha +author: John Snow Labs +name: jobbert_base_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jobbert_base_cased` is a English model originally trained by jjzha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jobbert_base_cased_en_5.1.1_3.0_1694631841420.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jobbert_base_cased_en_5.1.1_3.0_1694631841420.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("jobbert_base_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("jobbert_base_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jobbert_base_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.2 MB| + +## References + +https://huggingface.co/jjzha/jobbert-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kannada_bert_kn.md b/docs/_posts/ahmedlone127/2023-09-13-kannada_bert_kn.md new file mode 100644 index 00000000000000..d2953cc379d955 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kannada_bert_kn.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Kannada kannada_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: kannada_bert +date: 2023-09-13 +tags: [bert, kn, open_source, fill_mask, onnx] +task: Embeddings +language: kn +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kannada_bert` is a Kannada model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kannada_bert_kn_5.1.1_3.0_1694638806956.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kannada_bert_kn_5.1.1_3.0_1694638806956.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kannada_bert","kn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kannada_bert", "kn") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kannada_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|kn| +|Size:|890.5 MB| + +## References + +https://huggingface.co/l3cube-pune/kannada-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kazakhbertmulti_kk.md b/docs/_posts/ahmedlone127/2023-09-13-kazakhbertmulti_kk.md new file mode 100644 index 00000000000000..d19355b01557e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kazakhbertmulti_kk.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Kazakh kazakhbertmulti BertEmbeddings from amandyk +author: John Snow Labs +name: kazakhbertmulti +date: 2023-09-13 +tags: [bert, kk, open_source, fill_mask, onnx] +task: Embeddings +language: kk +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kazakhbertmulti` is a Kazakh model originally trained by amandyk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kazakhbertmulti_kk_5.1.1_3.0_1694567001040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kazakhbertmulti_kk_5.1.1_3.0_1694567001040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kazakhbertmulti","kk") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kazakhbertmulti", "kk") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kazakhbertmulti| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|kk| +|Size:|609.9 MB| + +## References + +https://huggingface.co/amandyk/KazakhBERTmulti \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kc_900_en.md b/docs/_posts/ahmedlone127/2023-09-13-kc_900_en.md new file mode 100644 index 00000000000000..105e0c7462f8a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kc_900_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kc_900 BertEmbeddings from erica +author: John Snow Labs +name: kc_900 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kc_900` is a English model originally trained by erica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kc_900_en_5.1.1_3.0_1694633305275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kc_900_en_5.1.1_3.0_1694633305275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kc_900","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kc_900", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kc_900| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/erica/kc_900 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbase400_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbase400_en.md new file mode 100644 index 00000000000000..1e7a615c41a4b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbase400_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbase400 BertEmbeddings from erica +author: John Snow Labs +name: kcbase400 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbase400` is a English model originally trained by erica. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbase400_en_5.1.1_3.0_1694633739043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbase400_en_5.1.1_3.0_1694633739043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbase400","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbase400", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbase400| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.3 MB| + +## References + +https://huggingface.co/erica/kcbase400 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_dev_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_dev_en.md new file mode 100644 index 00000000000000..e8fc285327efa2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_dev_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_base_dev BertEmbeddings from beomi +author: John Snow Labs +name: kcbert_base_dev +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_base_dev` is a English model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_base_dev_en_5.1.1_3.0_1694586571113.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_base_dev_en_5.1.1_3.0_1694586571113.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_base_dev","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_base_dev", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_base_dev| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.3 MB| + +## References + +https://huggingface.co/beomi/kcbert-base-dev \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_finetuned_en.md new file mode 100644 index 00000000000000..eab51b72428795 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_base_finetuned BertEmbeddings from eno3940 +author: John Snow Labs +name: kcbert_base_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_base_finetuned` is a English model originally trained by eno3940. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_base_finetuned_en_5.1.1_3.0_1694649289545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_base_finetuned_en_5.1.1_3.0_1694649289545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_base_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_base_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_base_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/eno3940/kcbert-base-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_ko.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_ko.md new file mode 100644 index 00000000000000..d20f4a148bf6e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean kcbert_base BertEmbeddings from beomi +author: John Snow Labs +name: kcbert_base +date: 2023-09-13 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_base` is a Korean model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_base_ko_5.1.1_3.0_1694586700028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_base_ko_5.1.1_3.0_1694586700028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_base","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_base", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|406.2 MB| + +## References + +https://huggingface.co/beomi/kcbert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_petition_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_petition_en.md new file mode 100644 index 00000000000000..e3dfe66d543d72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_base_petition_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_base_petition BertEmbeddings from Kyoungmin +author: John Snow Labs +name: kcbert_base_petition +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_base_petition` is a English model originally trained by Kyoungmin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_base_petition_en_5.1.1_3.0_1694565357198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_base_petition_en_5.1.1_3.0_1694565357198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_base_petition","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_base_petition", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_base_petition| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/Kyoungmin/kcbert-base-petition \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_dev_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_dev_en.md new file mode 100644 index 00000000000000..3231fe67c06f5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_dev_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_large_dev BertEmbeddings from beomi +author: John Snow Labs +name: kcbert_large_dev +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_large_dev` is a English model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_large_dev_en_5.1.1_3.0_1694587013172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_large_dev_en_5.1.1_3.0_1694587013172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_large_dev","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_large_dev", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_large_dev| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/beomi/kcbert-large-dev \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_en.md new file mode 100644 index 00000000000000..4750d48d788aef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_large BertEmbeddings from beomi +author: John Snow Labs +name: kcbert_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_large` is a English model originally trained by beomi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_large_en_5.1.1_3.0_1694587353127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_large_en_5.1.1_3.0_1694587353127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/beomi/kcbert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_finetuned_en.md new file mode 100644 index 00000000000000..6ea04c1b9af09d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_large_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_large_finetuned BertEmbeddings from LoraBaek +author: John Snow Labs +name: kcbert_large_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_large_finetuned` is a English model originally trained by LoraBaek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_large_finetuned_en_5.1.1_3.0_1694644868795.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_large_finetuned_en_5.1.1_3.0_1694644868795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_large_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_large_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_large_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/LoraBaek/kcbert-large-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kcbert_mlm_finetune_en.md b/docs/_posts/ahmedlone127/2023-09-13-kcbert_mlm_finetune_en.md new file mode 100644 index 00000000000000..ab9f10ea0b1bb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kcbert_mlm_finetune_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kcbert_mlm_finetune BertEmbeddings from stresscaptor +author: John Snow Labs +name: kcbert_mlm_finetune +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kcbert_mlm_finetune` is a English model originally trained by stresscaptor. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kcbert_mlm_finetune_en_5.1.1_3.0_1694567744835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kcbert_mlm_finetune_en_5.1.1_3.0_1694567744835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kcbert_mlm_finetune","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kcbert_mlm_finetune", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kcbert_mlm_finetune| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.4 MB| + +## References + +https://huggingface.co/stresscaptor/kcbert-mlm-finetune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweets_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweets_en.md new file mode 100644 index 00000000000000..1957d090be629a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweets_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_large_finetuned_kintweets BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_large_finetuned_kintweets +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_large_finetuned_kintweets` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweets_en_5.1.1_3.0_1694608062216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweets_en_5.1.1_3.0_1694608062216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_large_finetuned_kintweets","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_large_finetuned_kintweets", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_large_finetuned_kintweets| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-large-finetuned-kintweets \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsa_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsa_en.md new file mode 100644 index 00000000000000..4404452dfc8da6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_large_finetuned_kintweetsa BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_large_finetuned_kintweetsa +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_large_finetuned_kintweetsa` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweetsa_en_5.1.1_3.0_1694606829948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweetsa_en_5.1.1_3.0_1694606829948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_large_finetuned_kintweetsa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_large_finetuned_kintweetsa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_large_finetuned_kintweetsa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-large-finetuned-kintweetsA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsb_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsb_en.md new file mode 100644 index 00000000000000..0d6d296e28483b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_large_finetuned_kintweetsb BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_large_finetuned_kintweetsb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_large_finetuned_kintweetsb` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweetsb_en_5.1.1_3.0_1694596216029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweetsb_en_5.1.1_3.0_1694596216029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_large_finetuned_kintweetsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_large_finetuned_kintweetsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_large_finetuned_kintweetsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-large-finetuned-kintweetsB \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsc_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsc_en.md new file mode 100644 index 00000000000000..67edcdababab77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_large_finetuned_kintweetsc BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_large_finetuned_kintweetsc +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_large_finetuned_kintweetsc` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweetsc_en_5.1.1_3.0_1694598865169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweetsc_en_5.1.1_3.0_1694598865169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_large_finetuned_kintweetsc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_large_finetuned_kintweetsc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_large_finetuned_kintweetsc| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-large-finetuned-kintweetsC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsd_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsd_en.md new file mode 100644 index 00000000000000..b0f764b70467d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_large_finetuned_kintweetsd_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_large_finetuned_kintweetsd BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_large_finetuned_kintweetsd +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_large_finetuned_kintweetsd` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweetsd_en_5.1.1_3.0_1694601566724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_large_finetuned_kintweetsd_en_5.1.1_3.0_1694601566724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_large_finetuned_kintweetsd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_large_finetuned_kintweetsd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_large_finetuned_kintweetsd| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-large-finetuned-kintweetsD \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweets_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweets_en.md new file mode 100644 index 00000000000000..e5304d97df0562 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweets_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_small_finetuned_kintweets BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_small_finetuned_kintweets +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_small_finetuned_kintweets` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweets_en_5.1.1_3.0_1694607452346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweets_en_5.1.1_3.0_1694607452346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_small_finetuned_kintweets","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_small_finetuned_kintweets", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_small_finetuned_kintweets| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-small-finetuned-kintweets \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsa_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsa_en.md new file mode 100644 index 00000000000000..c71c91a278f7a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_small_finetuned_kintweetsa BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_small_finetuned_kintweetsa +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_small_finetuned_kintweetsa` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweetsa_en_5.1.1_3.0_1694606413895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweetsa_en_5.1.1_3.0_1694606413895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_small_finetuned_kintweetsa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_small_finetuned_kintweetsa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_small_finetuned_kintweetsa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-small-finetuned-kintweetsA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsb_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsb_en.md new file mode 100644 index 00000000000000..3763017395ce77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_small_finetuned_kintweetsb BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_small_finetuned_kintweetsb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_small_finetuned_kintweetsb` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweetsb_en_5.1.1_3.0_1694595713007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweetsb_en_5.1.1_3.0_1694595713007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_small_finetuned_kintweetsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_small_finetuned_kintweetsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_small_finetuned_kintweetsb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-small-finetuned-kintweetsB \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsc_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsc_en.md new file mode 100644 index 00000000000000..a6c98aa2fccbba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_small_finetuned_kintweetsc BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_small_finetuned_kintweetsc +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_small_finetuned_kintweetsc` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweetsc_en_5.1.1_3.0_1694598449540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweetsc_en_5.1.1_3.0_1694598449540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_small_finetuned_kintweetsc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_small_finetuned_kintweetsc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_small_finetuned_kintweetsc| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-small-finetuned-kintweetsC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsd_en.md b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsd_en.md new file mode 100644 index 00000000000000..b44ed59ea42313 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kinyabert_small_finetuned_kintweetsd_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kinyabert_small_finetuned_kintweetsd BertEmbeddings from RogerB +author: John Snow Labs +name: kinyabert_small_finetuned_kintweetsd +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kinyabert_small_finetuned_kintweetsd` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweetsd_en_5.1.1_3.0_1694601166540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kinyabert_small_finetuned_kintweetsd_en_5.1.1_3.0_1694601166540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kinyabert_small_finetuned_kintweetsd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kinyabert_small_finetuned_kintweetsd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kinyabert_small_finetuned_kintweetsd| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/RogerB/KinyaBERT-small-finetuned-kintweetsD \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-klue_base_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-klue_base_finetuned_en.md new file mode 100644 index 00000000000000..b8032d3fd51c7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-klue_base_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English klue_base_finetuned BertEmbeddings from eno3940 +author: John Snow Labs +name: klue_base_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`klue_base_finetuned` is a English model originally trained by eno3940. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/klue_base_finetuned_en_5.1.1_3.0_1694646857491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/klue_base_finetuned_en_5.1.1_3.0_1694646857491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("klue_base_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("klue_base_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|klue_base_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/eno3940/klue-base-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-klue_bert_epoch3_en.md b/docs/_posts/ahmedlone127/2023-09-13-klue_bert_epoch3_en.md new file mode 100644 index 00000000000000..bd821e3e444e85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-klue_bert_epoch3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English klue_bert_epoch3 BertEmbeddings from eno3940 +author: John Snow Labs +name: klue_bert_epoch3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`klue_bert_epoch3` is a English model originally trained by eno3940. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/klue_bert_epoch3_en_5.1.1_3.0_1694648970288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/klue_bert_epoch3_en_5.1.1_3.0_1694648970288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("klue_bert_epoch3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("klue_bert_epoch3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|klue_bert_epoch3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/eno3940/klue-bert-epoch3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-knowbias_bert_base_uncased_race_en.md b/docs/_posts/ahmedlone127/2023-09-13-knowbias_bert_base_uncased_race_en.md new file mode 100644 index 00000000000000..63fc96b6125e6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-knowbias_bert_base_uncased_race_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English knowbias_bert_base_uncased_race BertEmbeddings from squiduu +author: John Snow Labs +name: knowbias_bert_base_uncased_race +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`knowbias_bert_base_uncased_race` is a English model originally trained by squiduu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/knowbias_bert_base_uncased_race_en_5.1.1_3.0_1694582004965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/knowbias_bert_base_uncased_race_en_5.1.1_3.0_1694582004965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("knowbias_bert_base_uncased_race","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("knowbias_bert_base_uncased_race", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|knowbias_bert_base_uncased_race| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/squiduu/knowbias-bert-base-uncased-race \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-kpfbert_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-kpfbert_base_en.md new file mode 100644 index 00000000000000..5f84ee9050008c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-kpfbert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kpfbert_base BertEmbeddings from yunaissance +author: John Snow Labs +name: kpfbert_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kpfbert_base` is a English model originally trained by yunaissance. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kpfbert_base_en_5.1.1_3.0_1694584038349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kpfbert_base_en_5.1.1_3.0_1694584038349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kpfbert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kpfbert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kpfbert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|425.1 MB| + +## References + +https://huggingface.co/yunaissance/kpfbert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ksl_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-ksl_bert_en.md new file mode 100644 index 00000000000000..02249500c2f5a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ksl_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ksl_bert BertEmbeddings from dobbytk +author: John Snow Labs +name: ksl_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ksl_bert` is a English model originally trained by dobbytk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ksl_bert_en_5.1.1_3.0_1694626325742.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ksl_bert_en_5.1.1_3.0_1694626325742.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ksl_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ksl_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ksl_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/dobbytk/KSL-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-labse_english_russian_erzya_v1_ru.md b/docs/_posts/ahmedlone127/2023-09-13-labse_english_russian_erzya_v1_ru.md new file mode 100644 index 00000000000000..f3fb0cffb9aa05 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-labse_english_russian_erzya_v1_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian labse_english_russian_erzya_v1 BertEmbeddings from slone +author: John Snow Labs +name: labse_english_russian_erzya_v1 +date: 2023-09-13 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`labse_english_russian_erzya_v1` is a Russian model originally trained by slone. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/labse_english_russian_erzya_v1_ru_5.1.1_3.0_1694583966275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/labse_english_russian_erzya_v1_ru_5.1.1_3.0_1694583966275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("labse_english_russian_erzya_v1","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("labse_english_russian_erzya_v1", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|labse_english_russian_erzya_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|535.5 MB| + +## References + +https://huggingface.co/slone/LaBSE-en-ru-myv-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lb_mbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-lb_mbert_en.md new file mode 100644 index 00000000000000..16d13c67720cb1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lb_mbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lb_mbert BertEmbeddings from lothritz +author: John Snow Labs +name: lb_mbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lb_mbert` is a English model originally trained by lothritz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lb_mbert_en_5.1.1_3.0_1694596944765.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lb_mbert_en_5.1.1_3.0_1694596944765.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lb_mbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lb_mbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lb_mbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|621.8 MB| + +## References + +https://huggingface.co/lothritz/Lb_mBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_cased_ptbr_pt.md b/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_cased_ptbr_pt.md new file mode 100644 index 00000000000000..5584191dc3f44e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_cased_ptbr_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese legal_bert_base_cased_ptbr BertEmbeddings from dominguesm +author: John Snow Labs +name: legal_bert_base_cased_ptbr +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_bert_base_cased_ptbr` is a Portuguese model originally trained by dominguesm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_bert_base_cased_ptbr_pt_5.1.1_3.0_1694591089960.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_bert_base_cased_ptbr_pt_5.1.1_3.0_1694591089960.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_bert_base_cased_ptbr","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_bert_base_cased_ptbr", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_bert_base_cased_ptbr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|470.0 MB| + +## References + +https://huggingface.co/dominguesm/legal-bert-base-cased-ptbr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_uncased_finetuned_rramicus_en.md b/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_uncased_finetuned_rramicus_en.md new file mode 100644 index 00000000000000..29fa712a5d4e73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_bert_base_uncased_finetuned_rramicus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_bert_base_uncased_finetuned_rramicus BertEmbeddings from repro-rights-amicus-briefs +author: John Snow Labs +name: legal_bert_base_uncased_finetuned_rramicus +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_bert_base_uncased_finetuned_rramicus` is a English model originally trained by repro-rights-amicus-briefs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_bert_base_uncased_finetuned_rramicus_en_5.1.1_3.0_1694567706023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_bert_base_uncased_finetuned_rramicus_en_5.1.1_3.0_1694567706023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_bert_base_uncased_finetuned_rramicus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_bert_base_uncased_finetuned_rramicus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_bert_base_uncased_finetuned_rramicus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/repro-rights-amicus-briefs/legal-bert-base-uncased-finetuned-RRamicus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_bertimbau_base_pt.md b/docs/_posts/ahmedlone127/2023-09-13-legal_bertimbau_base_pt.md new file mode 100644 index 00000000000000..28b0a0221a83b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_bertimbau_base_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese legal_bertimbau_base BertEmbeddings from rufimelo +author: John Snow Labs +name: legal_bertimbau_base +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_bertimbau_base` is a Portuguese model originally trained by rufimelo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_bertimbau_base_pt_5.1.1_3.0_1694563613858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_bertimbau_base_pt_5.1.1_3.0_1694563613858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_bertimbau_base","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_bertimbau_base", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_bertimbau_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.8 MB| + +## References + +https://huggingface.co/rufimelo/Legal-BERTimbau-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_hebert_en.md b/docs/_posts/ahmedlone127/2023-09-13-legal_hebert_en.md new file mode 100644 index 00000000000000..508141215a8c84 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_hebert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_hebert BertEmbeddings from avichr +author: John Snow Labs +name: legal_hebert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_hebert` is a English model originally trained by avichr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_hebert_en_5.1.1_3.0_1694641249619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_hebert_en_5.1.1_3.0_1694641249619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_hebert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_hebert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_hebert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|462.5 MB| + +## References + +https://huggingface.co/avichr/Legal-heBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_en.md b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_en.md new file mode 100644 index 00000000000000..40968056fea7e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_indobert_pytorch BertEmbeddings from kapanjagocoding +author: John Snow Labs +name: legal_indobert_pytorch +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_indobert_pytorch` is a English model originally trained by kapanjagocoding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_en_5.1.1_3.0_1694636800311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_en_5.1.1_3.0_1694636800311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_indobert_pytorch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_indobert_pytorch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_indobert_pytorch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/kapanjagocoding/legal-indobert-pytorch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v1_en.md new file mode 100644 index 00000000000000..446b310aff75d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_indobert_pytorch_v1 BertEmbeddings from kapanjagocoding +author: John Snow Labs +name: legal_indobert_pytorch_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_indobert_pytorch_v1` is a English model originally trained by kapanjagocoding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_v1_en_5.1.1_3.0_1694637779586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_v1_en_5.1.1_3.0_1694637779586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_indobert_pytorch_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_indobert_pytorch_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_indobert_pytorch_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/kapanjagocoding/legal-indobert-pytorch-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v2_en.md b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v2_en.md new file mode 100644 index 00000000000000..dca64da8b73945 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_indobert_pytorch_v2 BertEmbeddings from kapanjagocoding +author: John Snow Labs +name: legal_indobert_pytorch_v2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_indobert_pytorch_v2` is a English model originally trained by kapanjagocoding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_v2_en_5.1.1_3.0_1694637348680.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_v2_en_5.1.1_3.0_1694637348680.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_indobert_pytorch_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_indobert_pytorch_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_indobert_pytorch_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/kapanjagocoding/legal-indobert-pytorch-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v3_en.md b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v3_en.md new file mode 100644 index 00000000000000..05faa53af7c6ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_indobert_pytorch_v3 BertEmbeddings from kapanjagocoding +author: John Snow Labs +name: legal_indobert_pytorch_v3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_indobert_pytorch_v3` is a English model originally trained by kapanjagocoding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_v3_en_5.1.1_3.0_1694638150002.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_v3_en_5.1.1_3.0_1694638150002.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_indobert_pytorch_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_indobert_pytorch_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_indobert_pytorch_v3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/kapanjagocoding/legal-indobert-pytorch-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v4_en.md b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v4_en.md new file mode 100644 index 00000000000000..3c192c002de763 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legal_indobert_pytorch_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_indobert_pytorch_v4 BertEmbeddings from kapanjagocoding +author: John Snow Labs +name: legal_indobert_pytorch_v4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_indobert_pytorch_v4` is a English model originally trained by kapanjagocoding. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_v4_en_5.1.1_3.0_1694643325435.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_indobert_pytorch_v4_en_5.1.1_3.0_1694643325435.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_indobert_pytorch_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_indobert_pytorch_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_indobert_pytorch_v4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/kapanjagocoding/legal-indobert-pytorch-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legalbert_adept_en.md b/docs/_posts/ahmedlone127/2023-09-13-legalbert_adept_en.md new file mode 100644 index 00000000000000..30fc217e2ac076 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legalbert_adept_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legalbert_adept BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: legalbert_adept +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalbert_adept` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalbert_adept_en_5.1.1_3.0_1694567799021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalbert_adept_en_5.1.1_3.0_1694567799021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalbert_adept","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalbert_adept", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalbert_adept| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hatemestinbejaia/legalbert-adept \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-legalnlp_bert_pt.md b/docs/_posts/ahmedlone127/2023-09-13-legalnlp_bert_pt.md new file mode 100644 index 00000000000000..ac59b8243b994d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-legalnlp_bert_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese legalnlp_bert BertEmbeddings from felipemaiapolo +author: John Snow Labs +name: legalnlp_bert +date: 2023-09-13 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalnlp_bert` is a Portuguese model originally trained by felipemaiapolo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalnlp_bert_pt_5.1.1_3.0_1694580772789.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalnlp_bert_pt_5.1.1_3.0_1694580772789.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalnlp_bert","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalnlp_bert", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalnlp_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.8 MB| + +## References + +https://huggingface.co/felipemaiapolo/legalnlp-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lernnavibert_en.md b/docs/_posts/ahmedlone127/2023-09-13-lernnavibert_en.md new file mode 100644 index 00000000000000..e5b80355eb7be6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lernnavibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lernnavibert BertEmbeddings from lucazed +author: John Snow Labs +name: lernnavibert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lernnavibert` is a English model originally trained by lucazed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lernnavibert_en_5.1.1_3.0_1694576101007.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lernnavibert_en_5.1.1_3.0_1694576101007.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lernnavibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lernnavibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lernnavibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/lucazed/LernnaviBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lesssexistbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-lesssexistbert_en.md new file mode 100644 index 00000000000000..b24b0324e69c1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lesssexistbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lesssexistbert BertEmbeddings from clincolnoz +author: John Snow Labs +name: lesssexistbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lesssexistbert` is a English model originally trained by clincolnoz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lesssexistbert_en_5.1.1_3.0_1694578655498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lesssexistbert_en_5.1.1_3.0_1694578655498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lesssexistbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lesssexistbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lesssexistbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/clincolnoz/LessSexistBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lexbert_turkish_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-lexbert_turkish_uncased_en.md new file mode 100644 index 00000000000000..f0c4f28546d29d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lexbert_turkish_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lexbert_turkish_uncased BertEmbeddings from sfurkan +author: John Snow Labs +name: lexbert_turkish_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lexbert_turkish_uncased` is a English model originally trained by sfurkan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lexbert_turkish_uncased_en_5.1.1_3.0_1694569214173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lexbert_turkish_uncased_en_5.1.1_3.0_1694569214173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lexbert_turkish_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lexbert_turkish_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lexbert_turkish_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.5 MB| + +## References + +https://huggingface.co/sfurkan/LexBERT-turkish-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lht_bert_512_en.md b/docs/_posts/ahmedlone127/2023-09-13-lht_bert_512_en.md new file mode 100644 index 00000000000000..8e6a0c237b6dc7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lht_bert_512_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lht_bert_512 BertEmbeddings from Shanny +author: John Snow Labs +name: lht_bert_512 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lht_bert_512` is a English model originally trained by Shanny. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lht_bert_512_en_5.1.1_3.0_1694635559814.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lht_bert_512_en_5.1.1_3.0_1694635559814.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lht_bert_512","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lht_bert_512", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lht_bert_512| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Shanny/LHT_BERT_512 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lht_bert_added_tokens_en.md b/docs/_posts/ahmedlone127/2023-09-13-lht_bert_added_tokens_en.md new file mode 100644 index 00000000000000..468f1bba828a96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lht_bert_added_tokens_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lht_bert_added_tokens BertEmbeddings from Shanny +author: John Snow Labs +name: lht_bert_added_tokens +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lht_bert_added_tokens` is a English model originally trained by Shanny. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lht_bert_added_tokens_en_5.1.1_3.0_1694630512751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lht_bert_added_tokens_en_5.1.1_3.0_1694630512751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lht_bert_added_tokens","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lht_bert_added_tokens", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lht_bert_added_tokens| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Shanny/LHT_BERT_added_tokens \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lht_bert_added_words_en.md b/docs/_posts/ahmedlone127/2023-09-13-lht_bert_added_words_en.md new file mode 100644 index 00000000000000..d52f808bf7aad1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lht_bert_added_words_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lht_bert_added_words BertEmbeddings from Shanny +author: John Snow Labs +name: lht_bert_added_words +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lht_bert_added_words` is a English model originally trained by Shanny. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lht_bert_added_words_en_5.1.1_3.0_1694629369350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lht_bert_added_words_en_5.1.1_3.0_1694629369350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lht_bert_added_words","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lht_bert_added_words", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lht_bert_added_words| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Shanny/LHT_BERT_added_words \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lht_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-lht_bert_en.md new file mode 100644 index 00000000000000..4b569d0297cd15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lht_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lht_bert BertEmbeddings from Shanny +author: John Snow Labs +name: lht_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lht_bert` is a English model originally trained by Shanny. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lht_bert_en_5.1.1_3.0_1694621223965.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lht_bert_en_5.1.1_3.0_1694621223965.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lht_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lht_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lht_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Shanny/LHT_BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-linkbert_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-linkbert_base_en.md new file mode 100644 index 00000000000000..23ffaa7b372c74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-linkbert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English linkbert_base BertEmbeddings from michiyasunaga +author: John Snow Labs +name: linkbert_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`linkbert_base` is a English model originally trained by michiyasunaga. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/linkbert_base_en_5.1.1_3.0_1694606637209.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/linkbert_base_en_5.1.1_3.0_1694606637209.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("linkbert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("linkbert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|linkbert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/michiyasunaga/LinkBERT-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-linkbert_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-linkbert_large_en.md new file mode 100644 index 00000000000000..8c6be65db73918 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-linkbert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English linkbert_large BertEmbeddings from michiyasunaga +author: John Snow Labs +name: linkbert_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`linkbert_large` is a English model originally trained by michiyasunaga. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/linkbert_large_en_5.1.1_3.0_1694605675771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/linkbert_large_en_5.1.1_3.0_1694605675771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("linkbert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("linkbert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|linkbert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/michiyasunaga/LinkBERT-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lsg4k_italian_legal_bert_it.md b/docs/_posts/ahmedlone127/2023-09-13-lsg4k_italian_legal_bert_it.md new file mode 100644 index 00000000000000..f3839e77af29a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lsg4k_italian_legal_bert_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian lsg4k_italian_legal_bert BertEmbeddings from dlicari +author: John Snow Labs +name: lsg4k_italian_legal_bert +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lsg4k_italian_legal_bert` is a Italian model originally trained by dlicari. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lsg4k_italian_legal_bert_it_5.1.1_3.0_1694596094098.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lsg4k_italian_legal_bert_it_5.1.1_3.0_1694596094098.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lsg4k_italian_legal_bert","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lsg4k_italian_legal_bert", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lsg4k_italian_legal_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|419.2 MB| + +## References + +https://huggingface.co/dlicari/lsg4k-Italian-Legal-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lsg_bert_base_uncased_4096_en.md b/docs/_posts/ahmedlone127/2023-09-13-lsg_bert_base_uncased_4096_en.md new file mode 100644 index 00000000000000..1645fb87513f11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lsg_bert_base_uncased_4096_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lsg_bert_base_uncased_4096 BertEmbeddings from ccdv +author: John Snow Labs +name: lsg_bert_base_uncased_4096 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lsg_bert_base_uncased_4096` is a English model originally trained by ccdv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lsg_bert_base_uncased_4096_en_5.1.1_3.0_1694564185857.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lsg_bert_base_uncased_4096_en_5.1.1_3.0_1694564185857.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lsg_bert_base_uncased_4096","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lsg_bert_base_uncased_4096", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lsg_bert_base_uncased_4096| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|417.5 MB| + +## References + +https://huggingface.co/ccdv/lsg-bert-base-uncased-4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lsg_legal_base_uncased_4096_en.md b/docs/_posts/ahmedlone127/2023-09-13-lsg_legal_base_uncased_4096_en.md new file mode 100644 index 00000000000000..709bbed08aba51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lsg_legal_base_uncased_4096_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lsg_legal_base_uncased_4096 BertEmbeddings from ccdv +author: John Snow Labs +name: lsg_legal_base_uncased_4096 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lsg_legal_base_uncased_4096` is a English model originally trained by ccdv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lsg_legal_base_uncased_4096_en_5.1.1_3.0_1694590941145.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lsg_legal_base_uncased_4096_en_5.1.1_3.0_1694590941145.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lsg_legal_base_uncased_4096","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lsg_legal_base_uncased_4096", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lsg_legal_base_uncased_4096| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|417.6 MB| + +## References + +https://huggingface.co/ccdv/lsg-legal-base-uncased-4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-lsg_legal_small_uncased_4096_en.md b/docs/_posts/ahmedlone127/2023-09-13-lsg_legal_small_uncased_4096_en.md new file mode 100644 index 00000000000000..269581411ea0dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-lsg_legal_small_uncased_4096_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lsg_legal_small_uncased_4096 BertEmbeddings from ccdv +author: John Snow Labs +name: lsg_legal_small_uncased_4096 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lsg_legal_small_uncased_4096` is a English model originally trained by ccdv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lsg_legal_small_uncased_4096_en_5.1.1_3.0_1694591070073.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lsg_legal_small_uncased_4096_en_5.1.1_3.0_1694591070073.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lsg_legal_small_uncased_4096","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lsg_legal_small_uncased_4096", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lsg_legal_small_uncased_4096| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|137.4 MB| + +## References + +https://huggingface.co/ccdv/lsg-legal-small-uncased-4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-luxembert_en.md b/docs/_posts/ahmedlone127/2023-09-13-luxembert_en.md new file mode 100644 index 00000000000000..390bb1ff2d62f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-luxembert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English luxembert BertEmbeddings from lothritz +author: John Snow Labs +name: luxembert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`luxembert` is a English model originally trained by lothritz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/luxembert_en_5.1.1_3.0_1694643972873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/luxembert_en_5.1.1_3.0_1694643972873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("luxembert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("luxembert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|luxembert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.7 MB| + +## References + +https://huggingface.co/lothritz/LuxemBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mabel_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-mabel_bert_base_uncased_en.md new file mode 100644 index 00000000000000..08dec8669262a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mabel_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mabel_bert_base_uncased BertEmbeddings from princeton-nlp +author: John Snow Labs +name: mabel_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mabel_bert_base_uncased` is a English model originally trained by princeton-nlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mabel_bert_base_uncased_en_5.1.1_3.0_1694611081845.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mabel_bert_base_uncased_en_5.1.1_3.0_1694611081845.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mabel_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mabel_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mabel_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/princeton-nlp/mabel-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mabepa_sts_es.md b/docs/_posts/ahmedlone127/2023-09-13-mabepa_sts_es.md new file mode 100644 index 00000000000000..46fd6e911ad2a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mabepa_sts_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish mabepa_sts BertEmbeddings from Brendar +author: John Snow Labs +name: mabepa_sts +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mabepa_sts` is a Castilian, Spanish model originally trained by Brendar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mabepa_sts_es_5.1.1_3.0_1694584433265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mabepa_sts_es_5.1.1_3.0_1694584433265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mabepa_sts","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mabepa_sts", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mabepa_sts| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.6 MB| + +## References + +https://huggingface.co/Brendar/MaBePa_STS \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-malay_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-malay_bert_en.md new file mode 100644 index 00000000000000..b8a4aa84b861b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-malay_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English malay_bert BertEmbeddings from NLP4H +author: John Snow Labs +name: malay_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malay_bert` is a English model originally trained by NLP4H. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malay_bert_en_5.1.1_3.0_1694568227304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malay_bert_en_5.1.1_3.0_1694568227304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("malay_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("malay_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malay_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/NLP4H/ms_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-malayalam_bert_ml.md b/docs/_posts/ahmedlone127/2023-09-13-malayalam_bert_ml.md new file mode 100644 index 00000000000000..41ea31fd981233 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-malayalam_bert_ml.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Malayalam malayalam_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: malayalam_bert +date: 2023-09-13 +tags: [bert, ml, open_source, fill_mask, onnx] +task: Embeddings +language: ml +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malayalam_bert` is a Malayalam model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malayalam_bert_ml_5.1.1_3.0_1694640553943.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malayalam_bert_ml_5.1.1_3.0_1694640553943.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("malayalam_bert","ml") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("malayalam_bert", "ml") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malayalam_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ml| +|Size:|890.5 MB| + +## References + +https://huggingface.co/l3cube-pune/malayalam-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-manubert_en.md b/docs/_posts/ahmedlone127/2023-09-13-manubert_en.md new file mode 100644 index 00000000000000..d0d6b24ffcc2f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-manubert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English manubert BertEmbeddings from akumar33 +author: John Snow Labs +name: manubert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`manubert` is a English model originally trained by akumar33. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/manubert_en_5.1.1_3.0_1694648045075.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/manubert_en_5.1.1_3.0_1694648045075.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("manubert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("manubert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|manubert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/akumar33/ManuBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_scratch_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_scratch_mr.md new file mode 100644 index 00000000000000..5de0ded2a4897b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_scratch_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_bert_scratch +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_bert_scratch` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_bert_scratch_mr_5.1.1_3.0_1694579363809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_bert_scratch_mr_5.1.1_3.0_1694579363809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_bert_scratch","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_bert_scratch", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|470.3 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_small_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_small_mr.md new file mode 100644 index 00000000000000..b9381b5b95cfc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_small_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_bert_small BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_bert_small +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_bert_small` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_bert_small_mr_5.1.1_3.0_1694584448262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_bert_small_mr_5.1.1_3.0_1694584448262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_bert_small","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_bert_small", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_bert_small| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|311.1 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_smaller_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_smaller_mr.md new file mode 100644 index 00000000000000..e3fad431f278b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_smaller_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_bert_smaller BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_bert_smaller +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_bert_smaller` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_bert_smaller_mr_5.1.1_3.0_1694584572735.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_bert_smaller_mr_5.1.1_3.0_1694584572735.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_bert_smaller","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_bert_smaller", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_bert_smaller| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|204.9 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert-smaller \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_v2_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_v2_mr.md new file mode 100644 index 00000000000000..f4caa9d6aa76ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_bert_v2_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_bert_v2 BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_bert_v2 +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_bert_v2` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_bert_v2_mr_5.1.1_3.0_1694573439496.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_bert_v2_mr_5.1.1_3.0_1694573439496.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_bert_v2","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_bert_v2", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_bert_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-bert-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_hateful_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_hateful_mr.md new file mode 100644 index 00000000000000..0be66bec861ee8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_hateful_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_tweets_bert_hateful BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_tweets_bert_hateful +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_tweets_bert_hateful` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_hateful_mr_5.1.1_3.0_1694578089840.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_hateful_mr_5.1.1_3.0_1694578089840.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_tweets_bert_hateful","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_tweets_bert_hateful", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_tweets_bert_hateful| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|890.7 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-tweets-bert-hateful \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_mr.md new file mode 100644 index 00000000000000..018c729848ea80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_tweets_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_tweets_bert +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_tweets_bert` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_mr_5.1.1_3.0_1694576608611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_mr_5.1.1_3.0_1694576608611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_tweets_bert","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_tweets_bert", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_tweets_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-tweets-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_scratch_mr.md b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_scratch_mr.md new file mode 100644 index 00000000000000..23d5443914b7c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marathi_tweets_bert_scratch_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_tweets_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: marathi_tweets_bert_scratch +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_tweets_bert_scratch` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_scratch_mr_5.1.1_3.0_1694631845235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_tweets_bert_scratch_mr_5.1.1_3.0_1694631845235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marathi_tweets_bert_scratch","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marathi_tweets_bert_scratch", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_tweets_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|470.4 MB| + +## References + +https://huggingface.co/l3cube-pune/marathi-tweets-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marbert_adept_en.md b/docs/_posts/ahmedlone127/2023-09-13-marbert_adept_en.md new file mode 100644 index 00000000000000..a944df7dd957a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marbert_adept_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English marbert_adept BertEmbeddings from hatemestinbejaia +author: John Snow Labs +name: marbert_adept +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marbert_adept` is a English model originally trained by hatemestinbejaia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marbert_adept_en_5.1.1_3.0_1694570997383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marbert_adept_en_5.1.1_3.0_1694570997383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marbert_adept","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marbert_adept", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marbert_adept| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.7 MB| + +## References + +https://huggingface.co/hatemestinbejaia/MARBERT-adept \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marbert_ar.md b/docs/_posts/ahmedlone127/2023-09-13-marbert_ar.md new file mode 100644 index 00000000000000..1fd0a24bafa01c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marbert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic marbert BertEmbeddings from UBC-NLP +author: John Snow Labs +name: marbert +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marbert` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marbert_ar_5.1.1_3.0_1694573980919.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marbert_ar_5.1.1_3.0_1694573980919.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marbert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marbert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|608.7 MB| + +## References + +https://huggingface.co/UBC-NLP/MARBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-marbertv2_ar.md b/docs/_posts/ahmedlone127/2023-09-13-marbertv2_ar.md new file mode 100644 index 00000000000000..0ae0523e7cfbf1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-marbertv2_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic marbertv2 BertEmbeddings from UBC-NLP +author: John Snow Labs +name: marbertv2 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marbertv2` is a Arabic model originally trained by UBC-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marbertv2_ar_5.1.1_3.0_1694574149293.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marbertv2_ar_5.1.1_3.0_1694574149293.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("marbertv2","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("marbertv2", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marbertv2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|606.5 MB| + +## References + +https://huggingface.co/UBC-NLP/MARBERTv2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-master_base_pretrained_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-master_base_pretrained_msmarco_en.md new file mode 100644 index 00000000000000..80367d6897e501 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-master_base_pretrained_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English master_base_pretrained_msmarco BertEmbeddings from lx865712528 +author: John Snow Labs +name: master_base_pretrained_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`master_base_pretrained_msmarco` is a English model originally trained by lx865712528. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/master_base_pretrained_msmarco_en_5.1.1_3.0_1694593327779.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/master_base_pretrained_msmarco_en_5.1.1_3.0_1694593327779.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("master_base_pretrained_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("master_base_pretrained_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|master_base_pretrained_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/lx865712528/master-base-pretrained-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-master_base_pretrained_wiki_en.md b/docs/_posts/ahmedlone127/2023-09-13-master_base_pretrained_wiki_en.md new file mode 100644 index 00000000000000..e329a7007d0685 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-master_base_pretrained_wiki_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English master_base_pretrained_wiki BertEmbeddings from lx865712528 +author: John Snow Labs +name: master_base_pretrained_wiki +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`master_base_pretrained_wiki` is a English model originally trained by lx865712528. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/master_base_pretrained_wiki_en_5.1.1_3.0_1694593477244.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/master_base_pretrained_wiki_en_5.1.1_3.0_1694593477244.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("master_base_pretrained_wiki","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("master_base_pretrained_wiki", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|master_base_pretrained_wiki| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/lx865712528/master-base-pretrained-wiki \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mathbert_croatian_en.md b/docs/_posts/ahmedlone127/2023-09-13-mathbert_croatian_en.md new file mode 100644 index 00000000000000..5ec63e8a94fc1f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mathbert_croatian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mathbert_croatian BertEmbeddings from mpajas +author: John Snow Labs +name: mathbert_croatian +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mathbert_croatian` is a English model originally trained by mpajas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mathbert_croatian_en_5.1.1_3.0_1694593152942.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mathbert_croatian_en_5.1.1_3.0_1694593152942.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mathbert_croatian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mathbert_croatian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mathbert_croatian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.4 MB| + +## References + +https://huggingface.co/mpajas/MathBERT_hr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mathbert_custom_en.md b/docs/_posts/ahmedlone127/2023-09-13-mathbert_custom_en.md new file mode 100644 index 00000000000000..54ba238f87fdf5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mathbert_custom_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mathbert_custom BertEmbeddings from tbs17 +author: John Snow Labs +name: mathbert_custom +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mathbert_custom` is a English model originally trained by tbs17. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mathbert_custom_en_5.1.1_3.0_1694576775346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mathbert_custom_en_5.1.1_3.0_1694576775346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mathbert_custom","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mathbert_custom", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mathbert_custom| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/tbs17/MathBERT-custom \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mattpuscibert_en.md b/docs/_posts/ahmedlone127/2023-09-13-mattpuscibert_en.md new file mode 100644 index 00000000000000..cb88cc1b9c9b72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mattpuscibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mattpuscibert BertEmbeddings from lfoppiano +author: John Snow Labs +name: mattpuscibert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mattpuscibert` is a English model originally trained by lfoppiano. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mattpuscibert_en_5.1.1_3.0_1694586067878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mattpuscibert_en_5.1.1_3.0_1694586067878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mattpuscibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mattpuscibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mattpuscibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/lfoppiano/MatTPUSciBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mbert_deen_en.md b/docs/_posts/ahmedlone127/2023-09-13-mbert_deen_en.md new file mode 100644 index 00000000000000..62c910323cef2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mbert_deen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_deen BertEmbeddings from miugod +author: John Snow Labs +name: mbert_deen +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_deen` is a English model originally trained by miugod. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_deen_en_5.1.1_3.0_1694644088821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_deen_en_5.1.1_3.0_1694644088821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_deen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_deen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_deen| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.1 MB| + +## References + +https://huggingface.co/miugod/mbert_deen \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mbert_finetuned_pytorch_en.md b/docs/_posts/ahmedlone127/2023-09-13-mbert_finetuned_pytorch_en.md new file mode 100644 index 00000000000000..063c04f6f55d97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mbert_finetuned_pytorch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_finetuned_pytorch BertEmbeddings from fimu-docproc-research +author: John Snow Labs +name: mbert_finetuned_pytorch +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_finetuned_pytorch` is a English model originally trained by fimu-docproc-research. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_finetuned_pytorch_en_5.1.1_3.0_1694583639590.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_finetuned_pytorch_en_5.1.1_3.0_1694583639590.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_finetuned_pytorch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_finetuned_pytorch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_finetuned_pytorch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/fimu-docproc-research/mbert-finetuned-pytorch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mbert_rom_arabic_en.md b/docs/_posts/ahmedlone127/2023-09-13-mbert_rom_arabic_en.md new file mode 100644 index 00000000000000..7b8139e8801249 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mbert_rom_arabic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_rom_arabic BertEmbeddings from Zappandy +author: John Snow Labs +name: mbert_rom_arabic +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_rom_arabic` is a English model originally trained by Zappandy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_rom_arabic_en_5.1.1_3.0_1694576557333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_rom_arabic_en_5.1.1_3.0_1694576557333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_rom_arabic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_rom_arabic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_rom_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/Zappandy/mBERT-rom-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mbertu_arabic_en.md b/docs/_posts/ahmedlone127/2023-09-13-mbertu_arabic_en.md new file mode 100644 index 00000000000000..34acc71ebced77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mbertu_arabic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbertu_arabic BertEmbeddings from Zappandy +author: John Snow Labs +name: mbertu_arabic +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbertu_arabic` is a English model originally trained by Zappandy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbertu_arabic_en_5.1.1_3.0_1694576350158.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbertu_arabic_en_5.1.1_3.0_1694576350158.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbertu_arabic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbertu_arabic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbertu_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.5 MB| + +## References + +https://huggingface.co/Zappandy/mBERTu-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mbertu_mt.md b/docs/_posts/ahmedlone127/2023-09-13-mbertu_mt.md new file mode 100644 index 00000000000000..dbdf2ab7dbdd6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mbertu_mt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Maltese mbertu BertEmbeddings from MLRS +author: John Snow Labs +name: mbertu +date: 2023-09-13 +tags: [bert, mt, open_source, fill_mask, onnx] +task: Embeddings +language: mt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbertu` is a Maltese model originally trained by MLRS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbertu_mt_5.1.1_3.0_1694635757980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbertu_mt_5.1.1_3.0_1694635757980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbertu","mt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbertu", "mt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbertu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mt| +|Size:|664.5 MB| + +## References + +https://huggingface.co/MLRS/mBERTu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mbertv2.0_en.md b/docs/_posts/ahmedlone127/2023-09-13-mbertv2.0_en.md new file mode 100644 index 00000000000000..3086268a0d014c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mbertv2.0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbertv2.0 BertEmbeddings from bongsoo +author: John Snow Labs +name: mbertv2.0 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbertv2.0` is a English model originally trained by bongsoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbertv2.0_en_5.1.1_3.0_1694583366759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbertv2.0_en_5.1.1_3.0_1694583366759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbertv2.0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbertv2.0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbertv2.0| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|753.1 MB| + +## References + +https://huggingface.co/bongsoo/mbertV2.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-me_bert_mixed_mr.md b/docs/_posts/ahmedlone127/2023-09-13-me_bert_mixed_mr.md new file mode 100644 index 00000000000000..08a957310142ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-me_bert_mixed_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi me_bert_mixed BertEmbeddings from l3cube-pune +author: John Snow Labs +name: me_bert_mixed +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`me_bert_mixed` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/me_bert_mixed_mr_5.1.1_3.0_1694647549621.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/me_bert_mixed_mr_5.1.1_3.0_1694647549621.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("me_bert_mixed","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("me_bert_mixed", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|me_bert_mixed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|665.0 MB| + +## References + +https://huggingface.co/l3cube-pune/me-bert-mixed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-me_bert_mixed_v2_mr.md b/docs/_posts/ahmedlone127/2023-09-13-me_bert_mixed_v2_mr.md new file mode 100644 index 00000000000000..210148a91de4eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-me_bert_mixed_v2_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi me_bert_mixed_v2 BertEmbeddings from l3cube-pune +author: John Snow Labs +name: me_bert_mixed_v2 +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`me_bert_mixed_v2` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/me_bert_mixed_v2_mr_5.1.1_3.0_1694591075469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/me_bert_mixed_v2_mr_5.1.1_3.0_1694591075469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("me_bert_mixed_v2","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("me_bert_mixed_v2", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|me_bert_mixed_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|890.6 MB| + +## References + +https://huggingface.co/l3cube-pune/me-bert-mixed-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-me_bert_mr.md b/docs/_posts/ahmedlone127/2023-09-13-me_bert_mr.md new file mode 100644 index 00000000000000..c2e61fe5bcacd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-me_bert_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi me_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: me_bert +date: 2023-09-13 +tags: [bert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`me_bert` is a Marathi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/me_bert_mr_5.1.1_3.0_1694646862410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/me_bert_mr_5.1.1_3.0_1694646862410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("me_bert","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("me_bert", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|me_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|407.2 MB| + +## References + +https://huggingface.co/l3cube-pune/me-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-meda_bert_da.md b/docs/_posts/ahmedlone127/2023-09-13-meda_bert_da.md new file mode 100644 index 00000000000000..1e0be1d1404606 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-meda_bert_da.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Danish meda_bert BertEmbeddings from jannikskytt +author: John Snow Labs +name: meda_bert +date: 2023-09-13 +tags: [bert, da, open_source, fill_mask, onnx] +task: Embeddings +language: da +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`meda_bert` is a Danish model originally trained by jannikskytt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/meda_bert_da_5.1.1_3.0_1694592472319.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/meda_bert_da_5.1.1_3.0_1694592472319.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("meda_bert","da") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("meda_bert", "da") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|meda_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|da| +|Size:|412.3 MB| + +## References + +https://huggingface.co/jannikskytt/MeDa-Bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-medbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-medbert_en.md new file mode 100644 index 00000000000000..1e5cd957cfa4a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-medbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English medbert BertEmbeddings from Charangan +author: John Snow Labs +name: medbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medbert` is a English model originally trained by Charangan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medbert_en_5.1.1_3.0_1694585151622.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medbert_en_5.1.1_3.0_1694585151622.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("medbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("medbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/Charangan/MedBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-medical_bio_bert2_en.md b/docs/_posts/ahmedlone127/2023-09-13-medical_bio_bert2_en.md new file mode 100644 index 00000000000000..ef12b2cfe94345 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-medical_bio_bert2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English medical_bio_bert2 BertEmbeddings from fspanda +author: John Snow Labs +name: medical_bio_bert2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medical_bio_bert2` is a English model originally trained by fspanda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medical_bio_bert2_en_5.1.1_3.0_1694647435608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medical_bio_bert2_en_5.1.1_3.0_1694647435608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("medical_bio_bert2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("medical_bio_bert2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medical_bio_bert2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/fspanda/Medical-Bio-BERT2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-melayubert_ms.md b/docs/_posts/ahmedlone127/2023-09-13-melayubert_ms.md new file mode 100644 index 00000000000000..482af23699300b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-melayubert_ms.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Malay (macrolanguage) melayubert BertEmbeddings from StevenLimcorn +author: John Snow Labs +name: melayubert +date: 2023-09-13 +tags: [bert, ms, open_source, fill_mask, onnx] +task: Embeddings +language: ms +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`melayubert` is a Malay (macrolanguage) model originally trained by StevenLimcorn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/melayubert_ms_5.1.1_3.0_1694571651129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/melayubert_ms_5.1.1_3.0_1694571651129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("melayubert","ms") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("melayubert", "ms") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|melayubert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ms| +|Size:|408.1 MB| + +## References + +https://huggingface.co/StevenLimcorn/MelayuBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-metaphor_finetuned_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-metaphor_finetuned_bert_en.md new file mode 100644 index 00000000000000..f67dea2a490de9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-metaphor_finetuned_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English metaphor_finetuned_bert BertEmbeddings from kangela +author: John Snow Labs +name: metaphor_finetuned_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`metaphor_finetuned_bert` is a English model originally trained by kangela. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/metaphor_finetuned_bert_en_5.1.1_3.0_1694633662658.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/metaphor_finetuned_bert_en_5.1.1_3.0_1694633662658.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("metaphor_finetuned_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("metaphor_finetuned_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|metaphor_finetuned_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/kangela/Metaphor-FineTuned-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-minialbert_128_en.md b/docs/_posts/ahmedlone127/2023-09-13-minialbert_128_en.md new file mode 100644 index 00000000000000..9947e5777491b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-minialbert_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minialbert_128 BertEmbeddings from nlpie +author: John Snow Labs +name: minialbert_128 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minialbert_128` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minialbert_128_en_5.1.1_3.0_1694567917349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minialbert_128_en_5.1.1_3.0_1694567917349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minialbert_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minialbert_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minialbert_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.0 MB| + +## References + +https://huggingface.co/nlpie/miniALBERT-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-minilm_l6_h384_uncased_eli5_en.md b/docs/_posts/ahmedlone127/2023-09-13-minilm_l6_h384_uncased_eli5_en.md new file mode 100644 index 00000000000000..b69ab1d7fbadcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-minilm_l6_h384_uncased_eli5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English minilm_l6_h384_uncased_eli5 BertEmbeddings from JackWolfard +author: John Snow Labs +name: minilm_l6_h384_uncased_eli5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`minilm_l6_h384_uncased_eli5` is a English model originally trained by JackWolfard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/minilm_l6_h384_uncased_eli5_en_5.1.1_3.0_1694583345340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/minilm_l6_h384_uncased_eli5_en_5.1.1_3.0_1694583345340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("minilm_l6_h384_uncased_eli5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("minilm_l6_h384_uncased_eli5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|minilm_l6_h384_uncased_eli5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|84.6 MB| + +## References + +https://huggingface.co/JackWolfard/minilm-l6-h384-uncased-eli5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_20230403_001_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230403_001_1_en.md new file mode 100644 index 00000000000000..1dda6b2a55d6e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230403_001_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230403_001_1 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230403_001_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230403_001_1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230403_001_1_en_5.1.1_3.0_1694605119094.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230403_001_1_en_5.1.1_3.0_1694605119094.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230403_001_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230403_001_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230403_001_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230403-001-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_20230403_002_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230403_002_1_en.md new file mode 100644 index 00000000000000..122eade6c26d81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230403_002_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230403_002_1 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230403_002_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230403_002_1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230403_002_1_en_5.1.1_3.0_1694604236712.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230403_002_1_en_5.1.1_3.0_1694604236712.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230403_002_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230403_002_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230403_002_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230403-002-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_20230404_001_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230404_001_2_en.md new file mode 100644 index 00000000000000..a29c0705353232 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230404_001_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230404_001_2 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230404_001_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230404_001_2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230404_001_2_en_5.1.1_3.0_1694608777175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230404_001_2_en_5.1.1_3.0_1694608777175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230404_001_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230404_001_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230404_001_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230404-001-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_20230404_002_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230404_002_1_en.md new file mode 100644 index 00000000000000..8c5b0f1c67eb01 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230404_002_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230404_002_1 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230404_002_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230404_002_1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230404_002_1_en_5.1.1_3.0_1694610217733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230404_002_1_en_5.1.1_3.0_1694610217733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230404_002_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230404_002_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230404_002_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230404-002-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_2_en.md new file mode 100644 index 00000000000000..3a0525308de1db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230405_002_2 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230405_002_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230405_002_2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230405_002_2_en_5.1.1_3.0_1694611265207.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230405_002_2_en_5.1.1_3.0_1694611265207.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230405_002_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230405_002_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230405_002_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230405-002-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_3_en.md new file mode 100644 index 00000000000000..e5a6ceb1a506fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230405_002_3 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230405_002_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230405_002_3` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230405_002_3_en_5.1.1_3.0_1694612950326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230405_002_3_en_5.1.1_3.0_1694612950326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230405_002_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230405_002_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230405_002_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230405-002-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_4_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_4_en.md new file mode 100644 index 00000000000000..67725b1e5c7263 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230405_002_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230405_002_4 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230405_002_4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230405_002_4` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230405_002_4_en_5.1.1_3.0_1694614086684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230405_002_4_en_5.1.1_3.0_1694614086684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230405_002_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230405_002_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230405_002_4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230405-002-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_20230406_002_5_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230406_002_5_en.md new file mode 100644 index 00000000000000..443a54d364e608 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_20230406_002_5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230406_002_5 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230406_002_5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230406_002_5` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230406_002_5_en_5.1.1_3.0_1694614626559.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230406_002_5_en_5.1.1_3.0_1694614626559.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230406_002_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230406_002_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230406_002_5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.3 MB| + +## References + +https://huggingface.co/intanm/mlm-20230406-002-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mlm_gh_issues_en.md b/docs/_posts/ahmedlone127/2023-09-13-mlm_gh_issues_en.md new file mode 100644 index 00000000000000..a9ed8a4ce17be7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mlm_gh_issues_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_gh_issues BertEmbeddings from ericntay +author: John Snow Labs +name: mlm_gh_issues +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_gh_issues` is a English model originally trained by ericntay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_gh_issues_en_5.1.1_3.0_1694577513579.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_gh_issues_en_5.1.1_3.0_1694577513579.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_gh_issues","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_gh_issues", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_gh_issues| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ericntay/mlm_gh_issues \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-model1_en.md b/docs/_posts/ahmedlone127/2023-09-13-model1_en.md new file mode 100644 index 00000000000000..b4e45db0bc3ce5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-model1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model1 BertEmbeddings from flymushroom +author: John Snow Labs +name: model1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model1` is a English model originally trained by flymushroom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model1_en_5.1.1_3.0_1694646795523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model1_en_5.1.1_3.0_1694646795523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/flymushroom/model1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-model_65000_20ep_en.md b/docs/_posts/ahmedlone127/2023-09-13-model_65000_20ep_en.md new file mode 100644 index 00000000000000..1ab4e5fa53572f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-model_65000_20ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_65000_20ep BertEmbeddings from sergiyvl +author: John Snow Labs +name: model_65000_20ep +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_65000_20ep` is a English model originally trained by sergiyvl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_65000_20ep_en_5.1.1_3.0_1694573722433.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_65000_20ep_en_5.1.1_3.0_1694573722433.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_65000_20ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_65000_20ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_65000_20ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.5 MB| + +## References + +https://huggingface.co/sergiyvl/model_65000_20ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-model_ankai_en.md b/docs/_posts/ahmedlone127/2023-09-13-model_ankai_en.md new file mode 100644 index 00000000000000..0474e5a33c124b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-model_ankai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_ankai BertEmbeddings from wudi7758521521 +author: John Snow Labs +name: model_ankai +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_ankai` is a English model originally trained by wudi7758521521. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_ankai_en_5.1.1_3.0_1694586066096.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_ankai_en_5.1.1_3.0_1694586066096.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_ankai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_ankai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_ankai| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wudi7758521521/model_ankai \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-model_bangla_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-model_bangla_bert_en.md new file mode 100644 index 00000000000000..29a3dc0f14d069 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-model_bangla_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_bangla_bert BertEmbeddings from Kowsher +author: John Snow Labs +name: model_bangla_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_bangla_bert` is a English model originally trained by Kowsher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_bangla_bert_en_5.1.1_3.0_1694565028072.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_bangla_bert_en_5.1.1_3.0_1694565028072.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_bangla_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_bangla_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_bangla_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|612.1 MB| + +## References + +https://huggingface.co/Kowsher/model-bangla-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-models_en.md b/docs/_posts/ahmedlone127/2023-09-13-models_en.md new file mode 100644 index 00000000000000..1d5b8802c96303 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-models_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English models BertEmbeddings from Dinithi +author: John Snow Labs +name: models +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`models` is a English model originally trained by Dinithi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/models_en_5.1.1_3.0_1694589815357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/models_en_5.1.1_3.0_1694589815357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("models","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("models", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|models| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/Dinithi/models \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-moresexistbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-moresexistbert_en.md new file mode 100644 index 00000000000000..15e51bcc31404a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-moresexistbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English moresexistbert BertEmbeddings from clincolnoz +author: John Snow Labs +name: moresexistbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`moresexistbert` is a English model originally trained by clincolnoz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/moresexistbert_en_5.1.1_3.0_1694577057922.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/moresexistbert_en_5.1.1_3.0_1694577057922.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("moresexistbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("moresexistbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|moresexistbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|404.7 MB| + +## References + +https://huggingface.co/clincolnoz/MoreSexistBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-morrbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-morrbert_en.md new file mode 100644 index 00000000000000..d753991de415db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-morrbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English morrbert BertEmbeddings from otmangi +author: John Snow Labs +name: morrbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`morrbert` is a English model originally trained by otmangi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/morrbert_en_5.1.1_3.0_1694580636063.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/morrbert_en_5.1.1_3.0_1694580636063.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("morrbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("morrbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|morrbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|469.9 MB| + +## References + +https://huggingface.co/otmangi/MorrBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mrf_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-mrf_bert_en.md new file mode 100644 index 00000000000000..fc040d7feadd6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mrf_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mrf_bert BertEmbeddings from petrichorRainbow +author: John Snow Labs +name: mrf_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mrf_bert` is a English model originally trained by petrichorRainbow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mrf_bert_en_5.1.1_3.0_1694608052096.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mrf_bert_en_5.1.1_3.0_1694608052096.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mrf_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mrf_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mrf_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/petrichorRainbow/mrf-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-multi_dialect_bert_base_arabic_ar.md b/docs/_posts/ahmedlone127/2023-09-13-multi_dialect_bert_base_arabic_ar.md new file mode 100644 index 00000000000000..135eaa9b58672b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-multi_dialect_bert_base_arabic_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic multi_dialect_bert_base_arabic BertEmbeddings from bashar-talafha +author: John Snow Labs +name: multi_dialect_bert_base_arabic +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multi_dialect_bert_base_arabic` is a Arabic model originally trained by bashar-talafha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multi_dialect_bert_base_arabic_ar_5.1.1_3.0_1694585102562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multi_dialect_bert_base_arabic_ar_5.1.1_3.0_1694585102562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("multi_dialect_bert_base_arabic","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("multi_dialect_bert_base_arabic", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multi_dialect_bert_base_arabic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|411.5 MB| + +## References + +https://huggingface.co/bashar-talafha/multi-dialect-bert-base-arabic \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-muril_with_mlm_cased_temp_en.md b/docs/_posts/ahmedlone127/2023-09-13-muril_with_mlm_cased_temp_en.md new file mode 100644 index 00000000000000..bed1b3b903c55e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-muril_with_mlm_cased_temp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English muril_with_mlm_cased_temp BertEmbeddings from simran-kh +author: John Snow Labs +name: muril_with_mlm_cased_temp +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`muril_with_mlm_cased_temp` is a English model originally trained by simran-kh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/muril_with_mlm_cased_temp_en_5.1.1_3.0_1694574360108.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/muril_with_mlm_cased_temp_en_5.1.1_3.0_1694574360108.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("muril_with_mlm_cased_temp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("muril_with_mlm_cased_temp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|muril_with_mlm_cased_temp| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|890.4 MB| + +## References + +https://huggingface.co/simran-kh/muril-with-mlm-cased-temp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mwp_bert_english_en.md b/docs/_posts/ahmedlone127/2023-09-13-mwp_bert_english_en.md new file mode 100644 index 00000000000000..a773bb355fd881 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mwp_bert_english_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mwp_bert_english BertEmbeddings from invokerliang +author: John Snow Labs +name: mwp_bert_english +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mwp_bert_english` is a English model originally trained by invokerliang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mwp_bert_english_en_5.1.1_3.0_1694575296759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mwp_bert_english_en_5.1.1_3.0_1694575296759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mwp_bert_english","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mwp_bert_english", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mwp_bert_english| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/invokerliang/MWP-BERT-en \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mybert_base_32k_en.md b/docs/_posts/ahmedlone127/2023-09-13-mybert_base_32k_en.md new file mode 100644 index 00000000000000..1a251c986969af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mybert_base_32k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mybert_base_32k BertEmbeddings from maveriq +author: John Snow Labs +name: mybert_base_32k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mybert_base_32k` is a English model originally trained by maveriq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mybert_base_32k_en_5.1.1_3.0_1694629972484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mybert_base_32k_en_5.1.1_3.0_1694629972484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mybert_base_32k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mybert_base_32k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mybert_base_32k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/maveriq/mybert-base-32k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_172k_en.md b/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_172k_en.md new file mode 100644 index 00000000000000..ca9d03fc99c778 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_172k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mybert_mini_172k BertEmbeddings from maveriq +author: John Snow Labs +name: mybert_mini_172k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mybert_mini_172k` is a English model originally trained by maveriq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mybert_mini_172k_en_5.1.1_3.0_1694630527630.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mybert_mini_172k_en_5.1.1_3.0_1694630527630.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mybert_mini_172k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mybert_mini_172k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mybert_mini_172k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/maveriq/mybert-mini-172k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_1m_en.md b/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_1m_en.md new file mode 100644 index 00000000000000..b9603b419e6d4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_1m_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mybert_mini_1m BertEmbeddings from maveriq +author: John Snow Labs +name: mybert_mini_1m +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mybert_mini_1m` is a English model originally trained by maveriq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mybert_mini_1m_en_5.1.1_3.0_1694640791254.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mybert_mini_1m_en_5.1.1_3.0_1694640791254.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mybert_mini_1m","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mybert_mini_1m", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mybert_mini_1m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/maveriq/mybert-mini-1M \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_500k_en.md b/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_500k_en.md new file mode 100644 index 00000000000000..2f196e297605db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mybert_mini_500k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mybert_mini_500k BertEmbeddings from maveriq +author: John Snow Labs +name: mybert_mini_500k +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mybert_mini_500k` is a English model originally trained by maveriq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mybert_mini_500k_en_5.1.1_3.0_1694640484187.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mybert_mini_500k_en_5.1.1_3.0_1694640484187.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mybert_mini_500k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mybert_mini_500k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mybert_mini_500k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|42.1 MB| + +## References + +https://huggingface.co/maveriq/mybert-mini-500k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymode03_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymode03_en.md new file mode 100644 index 00000000000000..79221a5937bc9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymode03_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymode03 BertEmbeddings from wbmitcast +author: John Snow Labs +name: mymode03 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymode03` is a English model originally trained by wbmitcast. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymode03_en_5.1.1_3.0_1694583211320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymode03_en_5.1.1_3.0_1694583211320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymode03","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymode03", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymode03| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wbmitcast/mymode03 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel001_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel001_en.md new file mode 100644 index 00000000000000..4af3b6e3af1de1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel001 BertEmbeddings from coiour +author: John Snow Labs +name: mymodel001 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel001` is a English model originally trained by coiour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel001_en_5.1.1_3.0_1694593944523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel001_en_5.1.1_3.0_1694593944523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/coiour/mymodel001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel005_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel005_en.md new file mode 100644 index 00000000000000..bb998c2ce0980e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel005_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel005 BertEmbeddings from wbmitcast +author: John Snow Labs +name: mymodel005 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel005` is a English model originally trained by wbmitcast. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel005_en_5.1.1_3.0_1694583366404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel005_en_5.1.1_3.0_1694583366404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel005","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel005", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel005| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wbmitcast/mymodel005 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel007_wbmitcast_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel007_wbmitcast_en.md new file mode 100644 index 00000000000000..181bd8341e32df --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel007_wbmitcast_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel007_wbmitcast BertEmbeddings from wbmitcast +author: John Snow Labs +name: mymodel007_wbmitcast +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel007_wbmitcast` is a English model originally trained by wbmitcast. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel007_wbmitcast_en_5.1.1_3.0_1694583570220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel007_wbmitcast_en_5.1.1_3.0_1694583570220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel007_wbmitcast","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel007_wbmitcast", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel007_wbmitcast| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wbmitcast/mymodel007 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel04_wbmitcast_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel04_wbmitcast_en.md new file mode 100644 index 00000000000000..8eefda37951adf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel04_wbmitcast_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel04_wbmitcast BertEmbeddings from wbmitcast +author: John Snow Labs +name: mymodel04_wbmitcast +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel04_wbmitcast` is a English model originally trained by wbmitcast. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel04_wbmitcast_en_5.1.1_3.0_1694583725964.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel04_wbmitcast_en_5.1.1_3.0_1694583725964.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel04_wbmitcast","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel04_wbmitcast", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel04_wbmitcast| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/wbmitcast/mymodel04 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel1001_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel1001_en.md new file mode 100644 index 00000000000000..4f3352e6c9268a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel1001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel1001 BertEmbeddings from cwitcate +author: John Snow Labs +name: mymodel1001 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel1001` is a English model originally trained by cwitcate. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel1001_en_5.1.1_3.0_1694594564846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel1001_en_5.1.1_3.0_1694594564846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel1001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel1001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel1001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/cwitcate/mymodel1001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-mymodel1007_en.md b/docs/_posts/ahmedlone127/2023-09-13-mymodel1007_en.md new file mode 100644 index 00000000000000..22775a1a5aa98b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-mymodel1007_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel1007 BertEmbeddings from Wilson2021 +author: John Snow Labs +name: mymodel1007 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel1007` is a English model originally trained by Wilson2021. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel1007_en_5.1.1_3.0_1694574662440.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel1007_en_5.1.1_3.0_1694574662440.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel1007","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel1007", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel1007| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Wilson2021/mymodel1007 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-naval_ko.md b/docs/_posts/ahmedlone127/2023-09-13-naval_ko.md new file mode 100644 index 00000000000000..bf8f8e20723e91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-naval_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean naval BertEmbeddings from rlatmddus159 +author: John Snow Labs +name: naval +date: 2023-09-13 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`naval` is a Korean model originally trained by rlatmddus159. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/naval_ko_5.1.1_3.0_1694592395863.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/naval_ko_5.1.1_3.0_1694592395863.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("naval","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("naval", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|naval| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|412.4 MB| + +## References + +https://huggingface.co/rlatmddus159/naval \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nbme_bio_clinicalbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-nbme_bio_clinicalbert_en.md new file mode 100644 index 00000000000000..a8752a06674e57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nbme_bio_clinicalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nbme_bio_clinicalbert BertEmbeddings from smeoni +author: John Snow Labs +name: nbme_bio_clinicalbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nbme_bio_clinicalbert` is a English model originally trained by smeoni. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nbme_bio_clinicalbert_en_5.1.1_3.0_1694632921114.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nbme_bio_clinicalbert_en_5.1.1_3.0_1694632921114.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nbme_bio_clinicalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nbme_bio_clinicalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nbme_bio_clinicalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.2 MB| + +## References + +https://huggingface.co/smeoni/nbme-Bio_ClinicalBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-ncert_bio_en.md b/docs/_posts/ahmedlone127/2023-09-13-ncert_bio_en.md new file mode 100644 index 00000000000000..e028ea29f2e04d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-ncert_bio_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ncert_bio BertEmbeddings from S1d-dha-nth3 +author: John Snow Labs +name: ncert_bio +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ncert_bio` is a English model originally trained by S1d-dha-nth3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ncert_bio_en_5.1.1_3.0_1694585454986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ncert_bio_en_5.1.1_3.0_1694585454986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ncert_bio","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ncert_bio", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ncert_bio| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/S1d-dha-nth3/ncert_bio \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nepal_bhasa_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-nepal_bhasa_model_en.md new file mode 100644 index 00000000000000..c8a926d5013ba4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nepal_bhasa_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nepal_bhasa_model BertEmbeddings from swadesh7 +author: John Snow Labs +name: nepal_bhasa_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepal_bhasa_model` is a English model originally trained by swadesh7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepal_bhasa_model_en_5.1.1_3.0_1694597351777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepal_bhasa_model_en_5.1.1_3.0_1694597351777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nepal_bhasa_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nepal_bhasa_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepal_bhasa_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.1 MB| + +## References + +https://huggingface.co/swadesh7/new_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nepalibert_en.md b/docs/_posts/ahmedlone127/2023-09-13-nepalibert_en.md new file mode 100644 index 00000000000000..df8acc60b5f81c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nepalibert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nepalibert BertEmbeddings from Rajan +author: John Snow Labs +name: nepalibert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepalibert` is a English model originally trained by Rajan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepalibert_en_5.1.1_3.0_1694569792967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepalibert_en_5.1.1_3.0_1694569792967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nepalibert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nepalibert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepalibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|305.4 MB| + +## References + +https://huggingface.co/Rajan/NepaliBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nepalibert_ne.md b/docs/_posts/ahmedlone127/2023-09-13-nepalibert_ne.md new file mode 100644 index 00000000000000..704887a504fc5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nepalibert_ne.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Nepali (macrolanguage) nepalibert BertEmbeddings from Shushant +author: John Snow Labs +name: nepalibert +date: 2023-09-13 +tags: [bert, ne, open_source, fill_mask, onnx] +task: Embeddings +language: ne +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepalibert` is a Nepali (macrolanguage) model originally trained by Shushant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepalibert_ne_5.1.1_3.0_1694571473601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepalibert_ne_5.1.1_3.0_1694571473601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nepalibert","ne") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nepalibert", "ne") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepalibert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ne| +|Size:|408.5 MB| + +## References + +https://huggingface.co/Shushant/nepaliBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nepnewsbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-nepnewsbert_en.md new file mode 100644 index 00000000000000..594d327855c6c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nepnewsbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nepnewsbert BertEmbeddings from Shushant +author: John Snow Labs +name: nepnewsbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nepnewsbert` is a English model originally trained by Shushant. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nepnewsbert_en_5.1.1_3.0_1694571258663.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nepnewsbert_en_5.1.1_3.0_1694571258663.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nepnewsbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nepnewsbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nepnewsbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.7 MB| + +## References + +https://huggingface.co/Shushant/NepNewsBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-netbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-netbert_en.md new file mode 100644 index 00000000000000..dd6bdc66980765 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-netbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English netbert BertEmbeddings from antoinelouis +author: John Snow Labs +name: netbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`netbert` is a English model originally trained by antoinelouis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/netbert_en_5.1.1_3.0_1694580370077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/netbert_en_5.1.1_3.0_1694580370077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("netbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("netbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|netbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.1 MB| + +## References + +https://huggingface.co/antoinelouis/netbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-neuba_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-neuba_bert_en.md new file mode 100644 index 00000000000000..b2a00258eba302 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-neuba_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English neuba_bert BertEmbeddings from thunlp +author: John Snow Labs +name: neuba_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`neuba_bert` is a English model originally trained by thunlp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/neuba_bert_en_5.1.1_3.0_1694577955856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/neuba_bert_en_5.1.1_3.0_1694577955856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("neuba_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("neuba_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|neuba_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/thunlp/neuba-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-newmodel_en.md b/docs/_posts/ahmedlone127/2023-09-13-newmodel_en.md new file mode 100644 index 00000000000000..ba37aa9589d2a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-newmodel_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English newmodel BertEmbeddings from Dinithi +author: John Snow Labs +name: newmodel +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`newmodel` is a English model originally trained by Dinithi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/newmodel_en_5.1.1_3.0_1694590192126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/newmodel_en_5.1.1_3.0_1694590192126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("newmodel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("newmodel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|newmodel| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/Dinithi/NewModel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-news_pretrain_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-news_pretrain_bert_en.md new file mode 100644 index 00000000000000..811a7e392add61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-news_pretrain_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English news_pretrain_bert BertEmbeddings from AnonymousSub +author: John Snow Labs +name: news_pretrain_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`news_pretrain_bert` is a English model originally trained by AnonymousSub. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/news_pretrain_bert_en_5.1.1_3.0_1694625096290.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/news_pretrain_bert_en_5.1.1_3.0_1694625096290.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("news_pretrain_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("news_pretrain_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|news_pretrain_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/AnonymousSub/news-pretrain-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nl1_en.md b/docs/_posts/ahmedlone127/2023-09-13-nl1_en.md new file mode 100644 index 00000000000000..7c685451ff7262 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nl1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nl1 BertEmbeddings from willemjan +author: John Snow Labs +name: nl1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nl1` is a English model originally trained by willemjan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nl1_en_5.1.1_3.0_1694584898456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nl1_en_5.1.1_3.0_1694584898456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nl1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nl1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nl1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/willemjan/nl1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nl2_en.md b/docs/_posts/ahmedlone127/2023-09-13-nl2_en.md new file mode 100644 index 00000000000000..4d58ee322f8dd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nl2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nl2 BertEmbeddings from willemjan +author: John Snow Labs +name: nl2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nl2` is a English model originally trained by willemjan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nl2_en_5.1.1_3.0_1694585097782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nl2_en_5.1.1_3.0_1694585097782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nl2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nl2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nl2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/willemjan/nl2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nlpload_en.md b/docs/_posts/ahmedlone127/2023-09-13-nlpload_en.md new file mode 100644 index 00000000000000..059972c587a9df --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nlpload_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nlpload BertEmbeddings from beiluo +author: John Snow Labs +name: nlpload +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nlpload` is a English model originally trained by beiluo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nlpload_en_5.1.1_3.0_1694586255849.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nlpload_en_5.1.1_3.0_1694586255849.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nlpload","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nlpload", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nlpload| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/beiluo/nlpload \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-notram_bert_norwegian_cased_080321_no.md b/docs/_posts/ahmedlone127/2023-09-13-notram_bert_norwegian_cased_080321_no.md new file mode 100644 index 00000000000000..b7ede65d1edf53 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-notram_bert_norwegian_cased_080321_no.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Norwegian notram_bert_norwegian_cased_080321 BertEmbeddings from NbAiLab +author: John Snow Labs +name: notram_bert_norwegian_cased_080321 +date: 2023-09-13 +tags: [bert, "no", open_source, fill_mask, onnx] +task: Embeddings +language: "no" +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`notram_bert_norwegian_cased_080321` is a Norwegian model originally trained by NbAiLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/notram_bert_norwegian_cased_080321_no_5.1.1_3.0_1694569128176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/notram_bert_norwegian_cased_080321_no_5.1.1_3.0_1694569128176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("notram_bert_norwegian_cased_080321","no") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("notram_bert_norwegian_cased_080321", "no") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|notram_bert_norwegian_cased_080321| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|no| +|Size:|663.0 MB| + +## References + +https://huggingface.co/NbAiLab/notram-bert-norwegian-cased-080321 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nuclear_medicine_dabert_en.md b/docs/_posts/ahmedlone127/2023-09-13-nuclear_medicine_dabert_en.md new file mode 100644 index 00000000000000..a03e803d152e11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nuclear_medicine_dabert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nuclear_medicine_dabert BertEmbeddings from Zach88 +author: John Snow Labs +name: nuclear_medicine_dabert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nuclear_medicine_dabert` is a English model originally trained by Zach88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nuclear_medicine_dabert_en_5.1.1_3.0_1694593201567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nuclear_medicine_dabert_en_5.1.1_3.0_1694593201567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nuclear_medicine_dabert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nuclear_medicine_dabert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nuclear_medicine_dabert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Zach88/nuclear_medicine_DABERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-nuclear_medicine_dabioclincialbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-nuclear_medicine_dabioclincialbert_en.md new file mode 100644 index 00000000000000..90c8d759c35bd2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-nuclear_medicine_dabioclincialbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nuclear_medicine_dabioclincialbert BertEmbeddings from Zach88 +author: John Snow Labs +name: nuclear_medicine_dabioclincialbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nuclear_medicine_dabioclincialbert` is a English model originally trained by Zach88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nuclear_medicine_dabioclincialbert_en_5.1.1_3.0_1694593343797.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nuclear_medicine_dabioclincialbert_en_5.1.1_3.0_1694593343797.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("nuclear_medicine_dabioclincialbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("nuclear_medicine_dabioclincialbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nuclear_medicine_dabioclincialbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.3 MB| + +## References + +https://huggingface.co/Zach88/nuclear_medicine_DAbioClincialBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-odia_bert_or.md b/docs/_posts/ahmedlone127/2023-09-13-odia_bert_or.md new file mode 100644 index 00000000000000..d68febbe294480 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-odia_bert_or.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Oriya (macrolanguage) odia_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: odia_bert +date: 2023-09-13 +tags: [bert, or, open_source, fill_mask, onnx] +task: Embeddings +language: or +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`odia_bert` is a Oriya (macrolanguage) model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/odia_bert_or_5.1.1_3.0_1694643840678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/odia_bert_or_5.1.1_3.0_1694643840678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("odia_bert","or") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("odia_bert", "or") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|odia_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|or| +|Size:|890.4 MB| + +## References + +https://huggingface.co/l3cube-pune/odia-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-olm_bert_base_uncased_oct_2022_en.md b/docs/_posts/ahmedlone127/2023-09-13-olm_bert_base_uncased_oct_2022_en.md new file mode 100644 index 00000000000000..b56ea2b2160904 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-olm_bert_base_uncased_oct_2022_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English olm_bert_base_uncased_oct_2022 BertEmbeddings from Tristan +author: John Snow Labs +name: olm_bert_base_uncased_oct_2022 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`olm_bert_base_uncased_oct_2022` is a English model originally trained by Tristan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/olm_bert_base_uncased_oct_2022_en_5.1.1_3.0_1694626342322.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/olm_bert_base_uncased_oct_2022_en_5.1.1_3.0_1694626342322.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("olm_bert_base_uncased_oct_2022","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("olm_bert_base_uncased_oct_2022", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|olm_bert_base_uncased_oct_2022| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.6 MB| + +## References + +https://huggingface.co/Tristan/olm-bert-base-uncased-oct-2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-op_bert_mathqa_en.md b/docs/_posts/ahmedlone127/2023-09-13-op_bert_mathqa_en.md new file mode 100644 index 00000000000000..707ccf8da66b92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-op_bert_mathqa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English op_bert_mathqa BertEmbeddings from Gxg +author: John Snow Labs +name: op_bert_mathqa +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`op_bert_mathqa` is a English model originally trained by Gxg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/op_bert_mathqa_en_5.1.1_3.0_1694589793352.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/op_bert_mathqa_en_5.1.1_3.0_1694589793352.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("op_bert_mathqa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("op_bert_mathqa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|op_bert_mathqa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Gxg/Op_Bert_MathQA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-opticalbert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-opticalbert_cased_en.md new file mode 100644 index 00000000000000..63d40cbc3aaeaf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-opticalbert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English opticalbert_cased BertEmbeddings from opticalmaterials +author: John Snow Labs +name: opticalbert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalbert_cased` is a English model originally trained by opticalmaterials. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalbert_cased_en_5.1.1_3.0_1694581051911.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalbert_cased_en_5.1.1_3.0_1694581051911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("opticalbert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("opticalbert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalbert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/opticalmaterials/opticalbert_cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-opticalbert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-opticalbert_uncased_en.md new file mode 100644 index 00000000000000..89048fd4589948 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-opticalbert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English opticalbert_uncased BertEmbeddings from opticalmaterials +author: John Snow Labs +name: opticalbert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalbert_uncased` is a English model originally trained by opticalmaterials. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalbert_uncased_en_5.1.1_3.0_1694581416416.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalbert_uncased_en_5.1.1_3.0_1694581416416.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("opticalbert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("opticalbert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalbert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/opticalmaterials/opticalbert_uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_cased_en.md new file mode 100644 index 00000000000000..a6bf328fba73bd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English opticalpurebert_cased BertEmbeddings from opticalmaterials +author: John Snow Labs +name: opticalpurebert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalpurebert_cased` is a English model originally trained by opticalmaterials. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalpurebert_cased_en_5.1.1_3.0_1694581559636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalpurebert_cased_en_5.1.1_3.0_1694581559636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("opticalpurebert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("opticalpurebert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalpurebert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/opticalmaterials/opticalpurebert_cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_uncased_en.md new file mode 100644 index 00000000000000..9f396c3edfa399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-opticalpurebert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English opticalpurebert_uncased BertEmbeddings from opticalmaterials +author: John Snow Labs +name: opticalpurebert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`opticalpurebert_uncased` is a English model originally trained by opticalmaterials. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/opticalpurebert_uncased_en_5.1.1_3.0_1694581713185.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/opticalpurebert_uncased_en_5.1.1_3.0_1694581713185.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("opticalpurebert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("opticalpurebert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|opticalpurebert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/opticalmaterials/opticalpurebert_uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-output_bert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-output_bert_uncased_en.md new file mode 100644 index 00000000000000..3ddbaf885a7ca8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-output_bert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English output_bert_uncased BertEmbeddings from btk +author: John Snow Labs +name: output_bert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`output_bert_uncased` is a English model originally trained by btk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/output_bert_uncased_en_5.1.1_3.0_1694589596652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/output_bert_uncased_en_5.1.1_3.0_1694589596652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("output_bert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("output_bert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|output_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/btk/output_bert_uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-output_miladfa7_en.md b/docs/_posts/ahmedlone127/2023-09-13-output_miladfa7_en.md new file mode 100644 index 00000000000000..7ebbc4d5b7b831 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-output_miladfa7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English output_miladfa7 BertEmbeddings from miladfa7 +author: John Snow Labs +name: output_miladfa7 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`output_miladfa7` is a English model originally trained by miladfa7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/output_miladfa7_en_5.1.1_3.0_1694569708274.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/output_miladfa7_en_5.1.1_3.0_1694569708274.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("output_miladfa7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("output_miladfa7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|output_miladfa7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.0 MB| + +## References + +https://huggingface.co/miladfa7/output \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pak_legal_bert_small_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-pak_legal_bert_small_uncased_en.md new file mode 100644 index 00000000000000..68b62e03dd9250 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pak_legal_bert_small_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pak_legal_bert_small_uncased BertEmbeddings from AISystems +author: John Snow Labs +name: pak_legal_bert_small_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pak_legal_bert_small_uncased` is a English model originally trained by AISystems. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pak_legal_bert_small_uncased_en_5.1.1_3.0_1694596392979.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pak_legal_bert_small_uncased_en_5.1.1_3.0_1694596392979.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pak_legal_bert_small_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pak_legal_bert_small_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pak_legal_bert_small_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|130.5 MB| + +## References + +https://huggingface.co/AISystems/PAK-LEGAL-BERT-small-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-paperly_imply_plus_mlm_3rd_time_en.md b/docs/_posts/ahmedlone127/2023-09-13-paperly_imply_plus_mlm_3rd_time_en.md new file mode 100644 index 00000000000000..0a22e61f319993 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-paperly_imply_plus_mlm_3rd_time_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English paperly_imply_plus_mlm_3rd_time BertEmbeddings from ashwathjadhav23 +author: John Snow Labs +name: paperly_imply_plus_mlm_3rd_time +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`paperly_imply_plus_mlm_3rd_time` is a English model originally trained by ashwathjadhav23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paperly_imply_plus_mlm_3rd_time_en_5.1.1_3.0_1694586225464.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paperly_imply_plus_mlm_3rd_time_en_5.1.1_3.0_1694586225464.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("paperly_imply_plus_mlm_3rd_time","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("paperly_imply_plus_mlm_3rd_time", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|paperly_imply_plus_mlm_3rd_time| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/ashwathjadhav23/paperly_imply_plus_MLM_3rd_time \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-paraphraserplus_1epoch_en.md b/docs/_posts/ahmedlone127/2023-09-13-paraphraserplus_1epoch_en.md new file mode 100644 index 00000000000000..20335bd56f296d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-paraphraserplus_1epoch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English paraphraserplus_1epoch BertEmbeddings from sergiyvl +author: John Snow Labs +name: paraphraserplus_1epoch +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`paraphraserplus_1epoch` is a English model originally trained by sergiyvl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/paraphraserplus_1epoch_en_5.1.1_3.0_1694573041191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/paraphraserplus_1epoch_en_5.1.1_3.0_1694573041191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("paraphraserplus_1epoch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("paraphraserplus_1epoch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|paraphraserplus_1epoch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.4 MB| + +## References + +https://huggingface.co/sergiyvl/ParaPhraserPlus_1epoch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-parlamint_en.md b/docs/_posts/ahmedlone127/2023-09-13-parlamint_en.md new file mode 100644 index 00000000000000..65579e5335403e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-parlamint_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English parlamint BertEmbeddings from IneG +author: John Snow Labs +name: parlamint +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`parlamint` is a English model originally trained by IneG. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/parlamint_en_5.1.1_3.0_1694584898291.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/parlamint_en_5.1.1_3.0_1694584898291.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("parlamint","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("parlamint", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|parlamint| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/IneG/parlamint \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-parlbert_german_v1_de.md b/docs/_posts/ahmedlone127/2023-09-13-parlbert_german_v1_de.md new file mode 100644 index 00000000000000..6ae3abde88faa7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-parlbert_german_v1_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German parlbert_german_v1 BertEmbeddings from chkla +author: John Snow Labs +name: parlbert_german_v1 +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`parlbert_german_v1` is a German model originally trained by chkla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/parlbert_german_v1_de_5.1.1_3.0_1694648419060.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/parlbert_german_v1_de_5.1.1_3.0_1694648419060.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("parlbert_german_v1","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("parlbert_german_v1", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|parlbert_german_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.8 MB| + +## References + +https://huggingface.co/chkla/parlbert-german-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-parlbert_german_v2_de.md b/docs/_posts/ahmedlone127/2023-09-13-parlbert_german_v2_de.md new file mode 100644 index 00000000000000..bf78662f3e709b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-parlbert_german_v2_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German parlbert_german_v2 BertEmbeddings from chkla +author: John Snow Labs +name: parlbert_german_v2 +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`parlbert_german_v2` is a German model originally trained by chkla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/parlbert_german_v2_de_5.1.1_3.0_1694595611001.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/parlbert_german_v2_de_5.1.1_3.0_1694595611001.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("parlbert_german_v2","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("parlbert_german_v2", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|parlbert_german_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|409.7 MB| + +## References + +https://huggingface.co/chkla/parlbert-german-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-parsbert_base_sanay_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-parsbert_base_sanay_uncased_en.md new file mode 100644 index 00000000000000..ebf072adbafbf1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-parsbert_base_sanay_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English parsbert_base_sanay_uncased BertEmbeddings from miladfa7 +author: John Snow Labs +name: parsbert_base_sanay_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`parsbert_base_sanay_uncased` is a English model originally trained by miladfa7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/parsbert_base_sanay_uncased_en_5.1.1_3.0_1694569942308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/parsbert_base_sanay_uncased_en_5.1.1_3.0_1694569942308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("parsbert_base_sanay_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("parsbert_base_sanay_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|parsbert_base_sanay_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|608.9 MB| + +## References + +https://huggingface.co/miladfa7/parsbert-base-sanay-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pashto_bert_c_en.md b/docs/_posts/ahmedlone127/2023-09-13-pashto_bert_c_en.md new file mode 100644 index 00000000000000..4370e423b498b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pashto_bert_c_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pashto_bert_c BertEmbeddings from ijazulhaq +author: John Snow Labs +name: pashto_bert_c +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pashto_bert_c` is a English model originally trained by ijazulhaq. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pashto_bert_c_en_5.1.1_3.0_1694629099684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pashto_bert_c_en_5.1.1_3.0_1694629099684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pashto_bert_c","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pashto_bert_c", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pashto_bert_c| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/ijazulhaq/pashto-bert-c \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-patana_chilean_spanish_bert_es.md b/docs/_posts/ahmedlone127/2023-09-13-patana_chilean_spanish_bert_es.md new file mode 100644 index 00000000000000..d244e1f29106cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-patana_chilean_spanish_bert_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish patana_chilean_spanish_bert BertEmbeddings from dccuchile +author: John Snow Labs +name: patana_chilean_spanish_bert +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`patana_chilean_spanish_bert` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/patana_chilean_spanish_bert_es_5.1.1_3.0_1694619206082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/patana_chilean_spanish_bert_es_5.1.1_3.0_1694619206082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("patana_chilean_spanish_bert","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("patana_chilean_spanish_bert", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|patana_chilean_spanish_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.4 MB| + +## References + +https://huggingface.co/dccuchile/patana-chilean-spanish-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pcscibert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-pcscibert_cased_en.md new file mode 100644 index 00000000000000..607352ec2d16c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pcscibert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pcscibert_cased BertEmbeddings from jmzk96 +author: John Snow Labs +name: pcscibert_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pcscibert_cased` is a English model originally trained by jmzk96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pcscibert_cased_en_5.1.1_3.0_1694586987079.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pcscibert_cased_en_5.1.1_3.0_1694586987079.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pcscibert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pcscibert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pcscibert_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/jmzk96/PCSciBERT_cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pcscibert_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-pcscibert_uncased_en.md new file mode 100644 index 00000000000000..95a8bc32aa1ced --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pcscibert_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pcscibert_uncased BertEmbeddings from jmzk96 +author: John Snow Labs +name: pcscibert_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pcscibert_uncased` is a English model originally trained by jmzk96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pcscibert_uncased_en_5.1.1_3.0_1694592569814.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pcscibert_uncased_en_5.1.1_3.0_1694592569814.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pcscibert_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pcscibert_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pcscibert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/jmzk96/PCSciBERT_uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-phrase_bert_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-13-phrase_bert_finetuned_imdb_en.md new file mode 100644 index 00000000000000..853995c9977ccb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-phrase_bert_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English phrase_bert_finetuned_imdb BertEmbeddings from Sarmila +author: John Snow Labs +name: phrase_bert_finetuned_imdb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`phrase_bert_finetuned_imdb` is a English model originally trained by Sarmila. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/phrase_bert_finetuned_imdb_en_5.1.1_3.0_1694641064662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/phrase_bert_finetuned_imdb_en_5.1.1_3.0_1694641064662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("phrase_bert_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("phrase_bert_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|phrase_bert_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Sarmila/phrase-bert-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-phs_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-phs_bert_en.md new file mode 100644 index 00000000000000..55e139407e13dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-phs_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English phs_bert BertEmbeddings from publichealthsurveillance +author: John Snow Labs +name: phs_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`phs_bert` is a English model originally trained by publichealthsurveillance. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/phs_bert_en_5.1.1_3.0_1694631316270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/phs_bert_en_5.1.1_3.0_1694631316270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("phs_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("phs_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|phs_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/publichealthsurveillance/PHS-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pretrained_kyw_e1_en.md b/docs/_posts/ahmedlone127/2023-09-13-pretrained_kyw_e1_en.md new file mode 100644 index 00000000000000..dd4ad5670e1bee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pretrained_kyw_e1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pretrained_kyw_e1 BertEmbeddings from shahriargolchin +author: John Snow Labs +name: pretrained_kyw_e1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pretrained_kyw_e1` is a English model originally trained by shahriargolchin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pretrained_kyw_e1_en_5.1.1_3.0_1694646138393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pretrained_kyw_e1_en_5.1.1_3.0_1694646138393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pretrained_kyw_e1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pretrained_kyw_e1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pretrained_kyw_e1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/shahriargolchin/pretrained_kyw_e1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-prompt_ls_english_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-prompt_ls_english_2_en.md new file mode 100644 index 00000000000000..d807d4577d1641 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-prompt_ls_english_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English prompt_ls_english_2 BertEmbeddings from lmvasque +author: John Snow Labs +name: prompt_ls_english_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`prompt_ls_english_2` is a English model originally trained by lmvasque. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/prompt_ls_english_2_en_5.1.1_3.0_1694625527257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/prompt_ls_english_2_en_5.1.1_3.0_1694625527257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("prompt_ls_english_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("prompt_ls_english_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|prompt_ls_english_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|625.5 MB| + +## References + +https://huggingface.co/lmvasque/prompt-ls-en-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_banking77_en.md b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_banking77_en.md new file mode 100644 index 00000000000000..0bf59c249e99db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_banking77_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English protaugment_lm_banking77 BertEmbeddings from tdopierre +author: John Snow Labs +name: protaugment_lm_banking77 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`protaugment_lm_banking77` is a English model originally trained by tdopierre. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/protaugment_lm_banking77_en_5.1.1_3.0_1694576921524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/protaugment_lm_banking77_en_5.1.1_3.0_1694576921524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("protaugment_lm_banking77","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("protaugment_lm_banking77", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|protaugment_lm_banking77| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/tdopierre/ProtAugment-LM-BANKING77 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_clinic150_en.md b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_clinic150_en.md new file mode 100644 index 00000000000000..8be2edd33ee6d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_clinic150_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English protaugment_lm_clinic150 BertEmbeddings from tdopierre +author: John Snow Labs +name: protaugment_lm_clinic150 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`protaugment_lm_clinic150` is a English model originally trained by tdopierre. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/protaugment_lm_clinic150_en_5.1.1_3.0_1694577083272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/protaugment_lm_clinic150_en_5.1.1_3.0_1694577083272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("protaugment_lm_clinic150","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("protaugment_lm_clinic150", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|protaugment_lm_clinic150| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/tdopierre/ProtAugment-LM-Clinic150 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_hwu64_en.md b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_hwu64_en.md new file mode 100644 index 00000000000000..19fb1698d6cdef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_hwu64_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English protaugment_lm_hwu64 BertEmbeddings from tdopierre +author: John Snow Labs +name: protaugment_lm_hwu64 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`protaugment_lm_hwu64` is a English model originally trained by tdopierre. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/protaugment_lm_hwu64_en_5.1.1_3.0_1694577269187.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/protaugment_lm_hwu64_en_5.1.1_3.0_1694577269187.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("protaugment_lm_hwu64","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("protaugment_lm_hwu64", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|protaugment_lm_hwu64| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/tdopierre/ProtAugment-LM-HWU64 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_liu_en.md b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_liu_en.md new file mode 100644 index 00000000000000..9fef29f88b1833 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-protaugment_lm_liu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English protaugment_lm_liu BertEmbeddings from tdopierre +author: John Snow Labs +name: protaugment_lm_liu +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`protaugment_lm_liu` is a English model originally trained by tdopierre. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/protaugment_lm_liu_en_5.1.1_3.0_1694577437431.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/protaugment_lm_liu_en_5.1.1_3.0_1694577437431.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("protaugment_lm_liu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("protaugment_lm_liu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|protaugment_lm_liu| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.4 MB| + +## References + +https://huggingface.co/tdopierre/ProtAugment-LM-Liu \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-prunedbert_l12_h256_a4_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-prunedbert_l12_h256_a4_finetuned_en.md new file mode 100644 index 00000000000000..7841fe1b778e1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-prunedbert_l12_h256_a4_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English prunedbert_l12_h256_a4_finetuned BertEmbeddings from eli4s +author: John Snow Labs +name: prunedbert_l12_h256_a4_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`prunedbert_l12_h256_a4_finetuned` is a English model originally trained by eli4s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/prunedbert_l12_h256_a4_finetuned_en_5.1.1_3.0_1694630185594.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/prunedbert_l12_h256_a4_finetuned_en_5.1.1_3.0_1694630185594.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("prunedbert_l12_h256_a4_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("prunedbert_l12_h256_a4_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|prunedbert_l12_h256_a4_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|111.9 MB| + +## References + +https://huggingface.co/eli4s/prunedBert-L12-h256-A4-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-prunedbert_l12_h384_a6_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-prunedbert_l12_h384_a6_finetuned_en.md new file mode 100644 index 00000000000000..3c9423988f0cf1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-prunedbert_l12_h384_a6_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English prunedbert_l12_h384_a6_finetuned BertEmbeddings from eli4s +author: John Snow Labs +name: prunedbert_l12_h384_a6_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`prunedbert_l12_h384_a6_finetuned` is a English model originally trained by eli4s. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/prunedbert_l12_h384_a6_finetuned_en_5.1.1_3.0_1694630539022.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/prunedbert_l12_h384_a6_finetuned_en_5.1.1_3.0_1694630539022.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("prunedbert_l12_h384_a6_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("prunedbert_l12_h384_a6_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|prunedbert_l12_h384_a6_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|176.4 MB| + +## References + +https://huggingface.co/eli4s/prunedBert-L12-h384-A6-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_en.md b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_en.md new file mode 100644 index 00000000000000..1dfebc410806ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pubmedbert_abstract_cord19 BertEmbeddings from pritamdeka +author: John Snow Labs +name: pubmedbert_abstract_cord19 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_abstract_cord19` is a English model originally trained by pritamdeka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_abstract_cord19_en_5.1.1_3.0_1694564002029.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_abstract_cord19_en_5.1.1_3.0_1694564002029.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pubmedbert_abstract_cord19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pubmedbert_abstract_cord19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_abstract_cord19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/pritamdeka/PubMedBert-abstract-cord19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_v2_en.md b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_v2_en.md new file mode 100644 index 00000000000000..a5c4d15b137480 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_abstract_cord19_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pubmedbert_abstract_cord19_v2 BertEmbeddings from pritamdeka +author: John Snow Labs +name: pubmedbert_abstract_cord19_v2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_abstract_cord19_v2` is a English model originally trained by pritamdeka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_abstract_cord19_v2_en_5.1.1_3.0_1694563849710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_abstract_cord19_v2_en_5.1.1_3.0_1694563849710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pubmedbert_abstract_cord19_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pubmedbert_abstract_cord19_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_abstract_cord19_v2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/pritamdeka/PubMedBert-abstract-cord19-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_fulltext_cord19_en.md b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_fulltext_cord19_en.md new file mode 100644 index 00000000000000..3b7bc95189a5b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pubmedbert_fulltext_cord19_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pubmedbert_fulltext_cord19 BertEmbeddings from pritamdeka +author: John Snow Labs +name: pubmedbert_fulltext_cord19 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pubmedbert_fulltext_cord19` is a English model originally trained by pritamdeka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pubmedbert_fulltext_cord19_en_5.1.1_3.0_1694564166623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pubmedbert_fulltext_cord19_en_5.1.1_3.0_1694564166623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pubmedbert_fulltext_cord19","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pubmedbert_fulltext_cord19", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pubmedbert_fulltext_cord19| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.1 MB| + +## References + +https://huggingface.co/pritamdeka/PubMedBert-fulltext-cord19 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-punjabi_bert_pa.md b/docs/_posts/ahmedlone127/2023-09-13-punjabi_bert_pa.md new file mode 100644 index 00000000000000..95cba25b441a16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-punjabi_bert_pa.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Panjabi, Punjabi punjabi_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: punjabi_bert +date: 2023-09-13 +tags: [bert, pa, open_source, fill_mask, onnx] +task: Embeddings +language: pa +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`punjabi_bert` is a Panjabi, Punjabi model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/punjabi_bert_pa_5.1.1_3.0_1694645329932.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/punjabi_bert_pa_5.1.1_3.0_1694645329932.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("punjabi_bert","pa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("punjabi_bert", "pa") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|punjabi_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pa| +|Size:|890.5 MB| + +## References + +https://huggingface.co/l3cube-pune/punjabi-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-pz_bert_kanuri_en.md b/docs/_posts/ahmedlone127/2023-09-13-pz_bert_kanuri_en.md new file mode 100644 index 00000000000000..0f41891579f9cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-pz_bert_kanuri_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pz_bert_kanuri BertEmbeddings from Hanwoon +author: John Snow Labs +name: pz_bert_kanuri +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pz_bert_kanuri` is a English model originally trained by Hanwoon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pz_bert_kanuri_en_5.1.1_3.0_1694626706084.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pz_bert_kanuri_en_5.1.1_3.0_1694626706084.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("pz_bert_kanuri","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("pz_bert_kanuri", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pz_bert_kanuri| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|441.1 MB| + +## References + +https://huggingface.co/Hanwoon/pz-bert-kr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qaideptmodel_en.md b/docs/_posts/ahmedlone127/2023-09-13-qaideptmodel_en.md new file mode 100644 index 00000000000000..91ba2dea30bd77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qaideptmodel_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English qaideptmodel BertEmbeddings from Razan +author: John Snow Labs +name: qaideptmodel +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qaideptmodel` is a English model originally trained by Razan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qaideptmodel_en_5.1.1_3.0_1694569996396.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qaideptmodel_en_5.1.1_3.0_1694569996396.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qaideptmodel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qaideptmodel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qaideptmodel| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.8 MB| + +## References + +https://huggingface.co/Razan/QAIDeptModel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qe3_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qe3_ar.md new file mode 100644 index 00000000000000..7fbf063d360c7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qe3_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qe3 BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qe3 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qe3` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qe3_ar_5.1.1_3.0_1694580295901.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qe3_ar_5.1.1_3.0_1694580295901.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qe3","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qe3", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qe3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.1 MB| + +## References + +https://huggingface.co/NLP-EXP/QE3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qe6_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qe6_ar.md new file mode 100644 index 00000000000000..b92c874a716461 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qe6_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qe6 BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qe6 +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qe6` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qe6_ar_5.1.1_3.0_1694580457879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qe6_ar_5.1.1_3.0_1694580457879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qe6","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qe6", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qe6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.2 MB| + +## References + +https://huggingface.co/NLP-EXP/QE6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qse_en.md b/docs/_posts/ahmedlone127/2023-09-13-qse_en.md new file mode 100644 index 00000000000000..ac1c35f3a24fce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qse_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English qse BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qse +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qse` is a English model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qse_en_5.1.1_3.0_1694582574251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qse_en_5.1.1_3.0_1694582574251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qse","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qse", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qse| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.0 MB| + +## References + +https://huggingface.co/NLP-EXP/QSE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qsr_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qsr_ar.md new file mode 100644 index 00000000000000..18104a5f339a1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qsr_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qsr BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qsr +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qsr` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qsr_ar_5.1.1_3.0_1694580143110.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qsr_ar_5.1.1_3.0_1694580143110.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qsr","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qsr", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qsr| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.0 MB| + +## References + +https://huggingface.co/NLP-EXP/QSR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qsrt_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qsrt_ar.md new file mode 100644 index 00000000000000..80bf5b476a74de --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qsrt_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qsrt BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qsrt +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qsrt` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qsrt_ar_5.1.1_3.0_1694579804623.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qsrt_ar_5.1.1_3.0_1694579804623.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qsrt","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qsrt", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qsrt| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.2 MB| + +## References + +https://huggingface.co/NLP-EXP/QSRT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-qst_ar.md b/docs/_posts/ahmedlone127/2023-09-13-qst_ar.md new file mode 100644 index 00000000000000..34eb8c95c873e7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-qst_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic qst BertEmbeddings from NLP-EXP +author: John Snow Labs +name: qst +date: 2023-09-13 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`qst` is a Arabic model originally trained by NLP-EXP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/qst_ar_5.1.1_3.0_1694579959539.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/qst_ar_5.1.1_3.0_1694579959539.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("qst","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("qst", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|qst| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|504.0 MB| + +## References + +https://huggingface.co/NLP-EXP/QST \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-quant_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-quant_bert_en.md new file mode 100644 index 00000000000000..33fd18fdd3cacb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-quant_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English quant_bert BertEmbeddings from dani0f +author: John Snow Labs +name: quant_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`quant_bert` is a English model originally trained by dani0f. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/quant_bert_en_5.1.1_3.0_1694580896756.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/quant_bert_en_5.1.1_3.0_1694580896756.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("quant_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("quant_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|quant_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/dani0f/quant_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-radbert_2m_en.md b/docs/_posts/ahmedlone127/2023-09-13-radbert_2m_en.md new file mode 100644 index 00000000000000..c05848d4764dee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-radbert_2m_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English radbert_2m BertEmbeddings from UCSD-VA-health +author: John Snow Labs +name: radbert_2m +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`radbert_2m` is a English model originally trained by UCSD-VA-health. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/radbert_2m_en_5.1.1_3.0_1694571151694.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/radbert_2m_en_5.1.1_3.0_1694571151694.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("radbert_2m","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("radbert_2m", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|radbert_2m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/UCSD-VA-health/RadBERT-2m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-recipe_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-13-recipe_bert_base_uncased_en.md new file mode 100644 index 00000000000000..e3eae7db825d7a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-recipe_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English recipe_bert_base_uncased BertEmbeddings from AnonymousSub +author: John Snow Labs +name: recipe_bert_base_uncased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`recipe_bert_base_uncased` is a English model originally trained by AnonymousSub. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/recipe_bert_base_uncased_en_5.1.1_3.0_1694577388934.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/recipe_bert_base_uncased_en_5.1.1_3.0_1694577388934.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("recipe_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("recipe_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|recipe_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/AnonymousSub/recipe-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text2_en.md b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text2_en.md new file mode 100644 index 00000000000000..9284efa835afb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English reddit_bert_text2 BertEmbeddings from flboehm +author: John Snow Labs +name: reddit_bert_text2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reddit_bert_text2` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reddit_bert_text2_en_5.1.1_3.0_1694643257404.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reddit_bert_text2_en_5.1.1_3.0_1694643257404.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("reddit_bert_text2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("reddit_bert_text2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reddit_bert_text2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|414.4 MB| + +## References + +https://huggingface.co/flboehm/reddit-bert-text2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text3_en.md b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text3_en.md new file mode 100644 index 00000000000000..69533f6cfa486e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English reddit_bert_text3 BertEmbeddings from flboehm +author: John Snow Labs +name: reddit_bert_text3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reddit_bert_text3` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reddit_bert_text3_en_5.1.1_3.0_1694643633284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reddit_bert_text3_en_5.1.1_3.0_1694643633284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("reddit_bert_text3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("reddit_bert_text3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reddit_bert_text3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|415.0 MB| + +## References + +https://huggingface.co/flboehm/reddit-bert-text3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text4_en.md b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text4_en.md new file mode 100644 index 00000000000000..cef44591552f48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English reddit_bert_text4 BertEmbeddings from flboehm +author: John Snow Labs +name: reddit_bert_text4 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reddit_bert_text4` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reddit_bert_text4_en_5.1.1_3.0_1694644221338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reddit_bert_text4_en_5.1.1_3.0_1694644221338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("reddit_bert_text4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("reddit_bert_text4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reddit_bert_text4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/flboehm/reddit-bert-text4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_10_en.md new file mode 100644 index 00000000000000..7b568bda23693f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English reddit_bert_text_10 BertEmbeddings from flboehm +author: John Snow Labs +name: reddit_bert_text_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reddit_bert_text_10` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reddit_bert_text_10_en_5.1.1_3.0_1694644674014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reddit_bert_text_10_en_5.1.1_3.0_1694644674014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("reddit_bert_text_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("reddit_bert_text_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reddit_bert_text_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/flboehm/reddit-bert-text_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_20_en.md new file mode 100644 index 00000000000000..4fb483f66bca0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English reddit_bert_text_20 BertEmbeddings from flboehm +author: John Snow Labs +name: reddit_bert_text_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reddit_bert_text_20` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reddit_bert_text_20_en_5.1.1_3.0_1694645187415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reddit_bert_text_20_en_5.1.1_3.0_1694645187415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("reddit_bert_text_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("reddit_bert_text_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reddit_bert_text_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/flboehm/reddit-bert-text_20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_5_en.md b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_5_en.md new file mode 100644 index 00000000000000..016c2e00b11dd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-reddit_bert_text_5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English reddit_bert_text_5 BertEmbeddings from flboehm +author: John Snow Labs +name: reddit_bert_text_5 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`reddit_bert_text_5` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/reddit_bert_text_5_en_5.1.1_3.0_1694645644655.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/reddit_bert_text_5_en_5.1.1_3.0_1694645644655.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("reddit_bert_text_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("reddit_bert_text_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|reddit_bert_text_5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.7 MB| + +## References + +https://huggingface.co/flboehm/reddit-bert-text_5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-retbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-retbert_en.md new file mode 100644 index 00000000000000..a04449fd7fe3b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-retbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English retbert BertEmbeddings from ThePixOne +author: John Snow Labs +name: retbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`retbert` is a English model originally trained by ThePixOne. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/retbert_en_5.1.1_3.0_1694572305505.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/retbert_en_5.1.1_3.0_1694572305505.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("retbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("retbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|retbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ThePixOne/retBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-retromae_beir_en.md b/docs/_posts/ahmedlone127/2023-09-13-retromae_beir_en.md new file mode 100644 index 00000000000000..5339be5c9bc0fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-retromae_beir_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English retromae_beir BertEmbeddings from Shitao +author: John Snow Labs +name: retromae_beir +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`retromae_beir` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/retromae_beir_en_5.1.1_3.0_1694594494255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/retromae_beir_en_5.1.1_3.0_1694594494255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("retromae_beir","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("retromae_beir", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|retromae_beir| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE_BEIR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-retromae_en.md b/docs/_posts/ahmedlone127/2023-09-13-retromae_en.md new file mode 100644 index 00000000000000..8da9f2925fb0d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-retromae_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English retromae BertEmbeddings from Shitao +author: John Snow Labs +name: retromae +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`retromae` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/retromae_en_5.1.1_3.0_1694594030983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/retromae_en_5.1.1_3.0_1694594030983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("retromae","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("retromae", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|retromae| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-retromae_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-retromae_msmarco_en.md new file mode 100644 index 00000000000000..4b81a14b8c23ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-retromae_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English retromae_msmarco BertEmbeddings from Shitao +author: John Snow Labs +name: retromae_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`retromae_msmarco` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/retromae_msmarco_en_5.1.1_3.0_1694594205951.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/retromae_msmarco_en_5.1.1_3.0_1694594205951.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("retromae_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("retromae_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|retromae_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE_MSMARCO \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-retromae_msmarco_finetune_en.md b/docs/_posts/ahmedlone127/2023-09-13-retromae_msmarco_finetune_en.md new file mode 100644 index 00000000000000..fe4dd6eefedd88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-retromae_msmarco_finetune_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English retromae_msmarco_finetune BertEmbeddings from Shitao +author: John Snow Labs +name: retromae_msmarco_finetune +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`retromae_msmarco_finetune` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/retromae_msmarco_finetune_en_5.1.1_3.0_1694594334803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/retromae_msmarco_finetune_en_5.1.1_3.0_1694594334803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("retromae_msmarco_finetune","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("retromae_msmarco_finetune", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|retromae_msmarco_finetune| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE_MSMARCO_finetune \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-roberta_base_culinary_en.md b/docs/_posts/ahmedlone127/2023-09-13-roberta_base_culinary_en.md new file mode 100644 index 00000000000000..102a06c15282b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-roberta_base_culinary_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English roberta_base_culinary BertEmbeddings from juancavallotti +author: John Snow Labs +name: roberta_base_culinary +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`roberta_base_culinary` is a English model originally trained by juancavallotti. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/roberta_base_culinary_en_5.1.1_3.0_1694642566778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/roberta_base_culinary_en_5.1.1_3.0_1694642566778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("roberta_base_culinary","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("roberta_base_culinary", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|roberta_base_culinary| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.5 MB| + +## References + +https://huggingface.co/juancavallotti/roberta-base-culinary \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_base_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_finetuned_en.md new file mode 100644 index 00000000000000..2355d04032b6e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English rubert_base_finetuned BertEmbeddings from rugo +author: John Snow Labs +name: rubert_base_finetuned +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_base_finetuned` is a English model originally trained by rugo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_base_finetuned_en_5.1.1_3.0_1694575565545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_base_finetuned_en_5.1.1_3.0_1694575565545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_base_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_base_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_base_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|667.0 MB| + +## References + +https://huggingface.co/rugo/ruBert-base-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_base_finetuned_russian_moshkov_child_corpus_pro_en.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_finetuned_russian_moshkov_child_corpus_pro_en.md new file mode 100644 index 00000000000000..03340c4ee972e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_finetuned_russian_moshkov_child_corpus_pro_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English rubert_base_finetuned_russian_moshkov_child_corpus_pro BertEmbeddings from a-v-bely +author: John Snow Labs +name: rubert_base_finetuned_russian_moshkov_child_corpus_pro +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_base_finetuned_russian_moshkov_child_corpus_pro` is a English model originally trained by a-v-bely. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_base_finetuned_russian_moshkov_child_corpus_pro_en_5.1.1_3.0_1694593004356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_base_finetuned_russian_moshkov_child_corpus_pro_en_5.1.1_3.0_1694593004356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_base_finetuned_russian_moshkov_child_corpus_pro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_base_finetuned_russian_moshkov_child_corpus_pro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_base_finetuned_russian_moshkov_child_corpus_pro| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|666.9 MB| + +## References + +https://huggingface.co/a-v-bely/ruBert-base-finetuned-russian-moshkov-child-corpus-pro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_base_ru.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_ru.md new file mode 100644 index 00000000000000..890f7c7e49be59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian rubert_base BertEmbeddings from ai-forever +author: John Snow Labs +name: rubert_base +date: 2023-09-13 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_base` is a Russian model originally trained by ai-forever. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_base_ru_5.1.1_3.0_1694571215568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_base_ru_5.1.1_3.0_1694571215568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_base","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_base", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|667.0 MB| + +## References + +https://huggingface.co/ai-forever/ruBert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_base_vet_en.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_vet_en.md new file mode 100644 index 00000000000000..a118d7dacbd2a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_base_vet_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English rubert_base_vet BertEmbeddings from erasedwalt +author: John Snow Labs +name: rubert_base_vet +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_base_vet` is a English model originally trained by erasedwalt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_base_vet_en_5.1.1_3.0_1694632381694.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_base_vet_en_5.1.1_3.0_1694632381694.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_base_vet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_base_vet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_base_vet| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|663.8 MB| + +## References + +https://huggingface.co/erasedwalt/rubert-base-vet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_large_ru.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_large_ru.md new file mode 100644 index 00000000000000..4215dc9aff15ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_large_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian rubert_large BertEmbeddings from ai-forever +author: John Snow Labs +name: rubert_large +date: 2023-09-13 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_large` is a Russian model originally trained by ai-forever. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_large_ru_5.1.1_3.0_1694571586736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_large_ru_5.1.1_3.0_1694571586736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_large","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_large", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|1.6 GB| + +## References + +https://huggingface.co/ai-forever/ruBert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_tiny2_ru.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_tiny2_ru.md new file mode 100644 index 00000000000000..aed9ae75b7de5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_tiny2_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian rubert_tiny2 BertEmbeddings from cointegrated +author: John Snow Labs +name: rubert_tiny2 +date: 2023-09-13 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_tiny2` is a Russian model originally trained by cointegrated. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_tiny2_ru_5.1.1_3.0_1694593810298.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_tiny2_ru_5.1.1_3.0_1694593810298.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_tiny2","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_tiny2", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_tiny2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|109.1 MB| + +## References + +https://huggingface.co/cointegrated/rubert-tiny2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubert_tiny_ru.md b/docs/_posts/ahmedlone127/2023-09-13-rubert_tiny_ru.md new file mode 100644 index 00000000000000..ec331b08510361 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubert_tiny_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian rubert_tiny BertEmbeddings from cointegrated +author: John Snow Labs +name: rubert_tiny +date: 2023-09-13 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubert_tiny` is a Russian model originally trained by cointegrated. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubert_tiny_ru_5.1.1_3.0_1694593728672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubert_tiny_ru_5.1.1_3.0_1694593728672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubert_tiny","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubert_tiny", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubert_tiny| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|43.8 MB| + +## References + +https://huggingface.co/cointegrated/rubert-tiny \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-rubiobert_en.md b/docs/_posts/ahmedlone127/2023-09-13-rubiobert_en.md new file mode 100644 index 00000000000000..709b81962d538b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-rubiobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English rubiobert BertEmbeddings from alexyalunin +author: John Snow Labs +name: rubiobert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`rubiobert` is a English model originally trained by alexyalunin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/rubiobert_en_5.1.1_3.0_1694578324460.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/rubiobert_en_5.1.1_3.0_1694578324460.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("rubiobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("rubiobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|rubiobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|667.1 MB| + +## References + +https://huggingface.co/alexyalunin/RuBioBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sanay_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-sanay_bert_en.md new file mode 100644 index 00000000000000..cc972bad977218 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sanay_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sanay_bert BertEmbeddings from miladfa7 +author: John Snow Labs +name: sanay_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sanay_bert` is a English model originally trained by miladfa7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sanay_bert_en_5.1.1_3.0_1694570248333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sanay_bert_en_5.1.1_3.0_1694570248333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sanay_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sanay_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sanay_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|602.3 MB| + +## References + +https://huggingface.co/miladfa7/sanay-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sanaybert_model_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-sanaybert_model_v1_en.md new file mode 100644 index 00000000000000..adf2f14f361d8e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sanaybert_model_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sanaybert_model_v1 BertEmbeddings from miladfa7 +author: John Snow Labs +name: sanaybert_model_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sanaybert_model_v1` is a English model originally trained by miladfa7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sanaybert_model_v1_en_5.1.1_3.0_1694570593906.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sanaybert_model_v1_en_5.1.1_3.0_1694570593906.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sanaybert_model_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sanaybert_model_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sanaybert_model_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.3 MB| + +## References + +https://huggingface.co/miladfa7/sanayBERT_model_V1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sci_summary_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-sci_summary_2_en.md new file mode 100644 index 00000000000000..a474d3f7900764 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sci_summary_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sci_summary_2 BertEmbeddings from PrathameshPawar +author: John Snow Labs +name: sci_summary_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sci_summary_2` is a English model originally trained by PrathameshPawar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sci_summary_2_en_5.1.1_3.0_1694605799787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sci_summary_2_en_5.1.1_3.0_1694605799787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sci_summary_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sci_summary_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sci_summary_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/PrathameshPawar/sci_summary_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sci_summary_3_en.md b/docs/_posts/ahmedlone127/2023-09-13-sci_summary_3_en.md new file mode 100644 index 00000000000000..2aff2c17e6f193 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sci_summary_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sci_summary_3 BertEmbeddings from PrathameshPawar +author: John Snow Labs +name: sci_summary_3 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sci_summary_3` is a English model originally trained by PrathameshPawar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sci_summary_3_en_5.1.1_3.0_1694610884788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sci_summary_3_en_5.1.1_3.0_1694610884788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sci_summary_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sci_summary_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sci_summary_3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.7 MB| + +## References + +https://huggingface.co/PrathameshPawar/sci_summary_3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v1_finetuned_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v1_finetuned_20_en.md new file mode 100644 index 00000000000000..c5e0519e83ef15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v1_finetuned_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_lm_v1_finetuned_20 BertEmbeddings from ariesutiono +author: John Snow Labs +name: scibert_lm_v1_finetuned_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_lm_v1_finetuned_20` is a English model originally trained by ariesutiono. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_lm_v1_finetuned_20_en_5.1.1_3.0_1694564841995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_lm_v1_finetuned_20_en_5.1.1_3.0_1694564841995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_lm_v1_finetuned_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_lm_v1_finetuned_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_lm_v1_finetuned_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|499.5 MB| + +## References + +https://huggingface.co/ariesutiono/scibert-lm-v1-finetuned-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v2_finetuned_20_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v2_finetuned_20_en.md new file mode 100644 index 00000000000000..ee7b79a92a3f4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_lm_v2_finetuned_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_lm_v2_finetuned_20 BertEmbeddings from ariesutiono +author: John Snow Labs +name: scibert_lm_v2_finetuned_20 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_lm_v2_finetuned_20` is a English model originally trained by ariesutiono. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_lm_v2_finetuned_20_en_5.1.1_3.0_1694565081199.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_lm_v2_finetuned_20_en_5.1.1_3.0_1694565081199.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_lm_v2_finetuned_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_lm_v2_finetuned_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_lm_v2_finetuned_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|499.5 MB| + +## References + +https://huggingface.co/ariesutiono/scibert-lm-v2-finetuned-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_cased_finetuned_scc_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_cased_finetuned_scc_en.md new file mode 100644 index 00000000000000..67559b1f94102b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_cased_finetuned_scc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_cased_finetuned_scc BertEmbeddings from phd2023 +author: John Snow Labs +name: scibert_scivocab_cased_finetuned_scc +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_cased_finetuned_scc` is a English model originally trained by phd2023. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_cased_finetuned_scc_en_5.1.1_3.0_1694589613830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_cased_finetuned_scc_en_5.1.1_3.0_1694589613830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_cased_finetuned_scc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_cased_finetuned_scc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_cased_finetuned_scc| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/phd2023/scibert_scivocab_cased-finetuned-scc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibero_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibero_en.md new file mode 100644 index 00000000000000..777c51d6e921d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibero_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_finetuned_scibero BertEmbeddings from Transabrar +author: John Snow Labs +name: scibert_scivocab_uncased_finetuned_scibero +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_finetuned_scibero` is a English model originally trained by Transabrar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_finetuned_scibero_en_5.1.1_3.0_1694572669211.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_finetuned_scibero_en_5.1.1_3.0_1694572669211.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_finetuned_scibero","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_finetuned_scibero", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_finetuned_scibero| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/Transabrar/scibert_scivocab_uncased-finetuned-scibero \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en.md new file mode 100644 index 00000000000000..4170c091360902 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_finetuned_scibert_agu_abstracts BertEmbeddings from arminmehrabian +author: John Snow Labs +name: scibert_scivocab_uncased_finetuned_scibert_agu_abstracts +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_finetuned_scibert_agu_abstracts` is a English model originally trained by arminmehrabian. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en_5.1.1_3.0_1694563751306.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_finetuned_scibert_agu_abstracts_en_5.1.1_3.0_1694563751306.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_finetuned_scibert_agu_abstracts","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_finetuned_scibert_agu_abstracts", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_finetuned_scibert_agu_abstracts| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/arminmehrabian/scibert_scivocab_uncased-finetuned-scibert-agu-abstracts \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_long_4096_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_long_4096_en.md new file mode 100644 index 00000000000000..56b07713c1bea8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_long_4096_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_long_4096 BertEmbeddings from yorko +author: John Snow Labs +name: scibert_scivocab_uncased_long_4096 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_long_4096` is a English model originally trained by yorko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_long_4096_en_5.1.1_3.0_1694587393144.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_long_4096_en_5.1.1_3.0_1694587393144.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_long_4096","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_long_4096", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_long_4096| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|420.3 MB| + +## References + +https://huggingface.co/yorko/scibert_scivocab_uncased_long_4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_malayalam_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_malayalam_finetuned_imdb_en.md new file mode 100644 index 00000000000000..4f1772f77ae8bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_scivocab_uncased_malayalam_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_scivocab_uncased_malayalam_finetuned_imdb BertEmbeddings from Sarmila +author: John Snow Labs +name: scibert_scivocab_uncased_malayalam_finetuned_imdb +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_scivocab_uncased_malayalam_finetuned_imdb` is a English model originally trained by Sarmila. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_malayalam_finetuned_imdb_en_5.1.1_3.0_1694594325370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_scivocab_uncased_malayalam_finetuned_imdb_en_5.1.1_3.0_1694594325370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_scivocab_uncased_malayalam_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_scivocab_uncased_malayalam_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_scivocab_uncased_malayalam_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/Sarmila/scibert_scivocab_uncased_ml-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scibert_wechsel_korean_en.md b/docs/_posts/ahmedlone127/2023-09-13-scibert_wechsel_korean_en.md new file mode 100644 index 00000000000000..3ee8667dc06922 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scibert_wechsel_korean_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scibert_wechsel_korean BertEmbeddings from LeverageX +author: John Snow Labs +name: scibert_wechsel_korean +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scibert_wechsel_korean` is a English model originally trained by LeverageX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scibert_wechsel_korean_en_5.1.1_3.0_1694566166641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scibert_wechsel_korean_en_5.1.1_3.0_1694566166641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scibert_wechsel_korean","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scibert_wechsel_korean", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scibert_wechsel_korean| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/LeverageX/scibert-wechsel-korean \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-scinewsbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-scinewsbert_en.md new file mode 100644 index 00000000000000..9d56fd0209afb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-scinewsbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scinewsbert BertEmbeddings from psmeros +author: John Snow Labs +name: scinewsbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scinewsbert` is a English model originally trained by psmeros. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scinewsbert_en_5.1.1_3.0_1694596298829.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scinewsbert_en_5.1.1_3.0_1694596298829.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scinewsbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scinewsbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scinewsbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/psmeros/SciNewsBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-segabert_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-segabert_large_en.md new file mode 100644 index 00000000000000..d47852fa81c05c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-segabert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English segabert_large BertEmbeddings from rsvp-AI-ca +author: John Snow Labs +name: segabert_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`segabert_large` is a English model originally trained by rsvp-AI-ca. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/segabert_large_en_5.1.1_3.0_1694568994445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/segabert_large_en_5.1.1_3.0_1694568994445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("segabert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("segabert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|segabert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|941.1 MB| + +## References + +https://huggingface.co/rsvp-AI-ca/segabert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sentmae_beir_en.md b/docs/_posts/ahmedlone127/2023-09-13-sentmae_beir_en.md new file mode 100644 index 00000000000000..b797ac70350b25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sentmae_beir_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sentmae_beir BertEmbeddings from SamuelYang +author: John Snow Labs +name: sentmae_beir +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentmae_beir` is a English model originally trained by SamuelYang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentmae_beir_en_5.1.1_3.0_1694567238490.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentmae_beir_en_5.1.1_3.0_1694567238490.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sentmae_beir","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sentmae_beir", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentmae_beir| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/SamuelYang/SentMAE_BEIR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sentmae_en.md b/docs/_posts/ahmedlone127/2023-09-13-sentmae_en.md new file mode 100644 index 00000000000000..c71ac5be26922e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sentmae_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sentmae BertEmbeddings from SamuelYang +author: John Snow Labs +name: sentmae +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentmae` is a English model originally trained by SamuelYang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentmae_en_5.1.1_3.0_1694567054783.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentmae_en_5.1.1_3.0_1694567054783.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sentmae","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sentmae", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentmae| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/SamuelYang/SentMAE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_domain_en.md b/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_domain_en.md new file mode 100644 index 00000000000000..19be8159194bcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_domain_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sentmae_msmarco_domain BertEmbeddings from SamuelYang +author: John Snow Labs +name: sentmae_msmarco_domain +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentmae_msmarco_domain` is a English model originally trained by SamuelYang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentmae_msmarco_domain_en_5.1.1_3.0_1694592225516.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentmae_msmarco_domain_en_5.1.1_3.0_1694592225516.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sentmae_msmarco_domain","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sentmae_msmarco_domain", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentmae_msmarco_domain| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/SamuelYang/SentMAE_MSMARCO_Domain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_en.md new file mode 100644 index 00000000000000..a26ae4ff2a2120 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sentmae_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sentmae_msmarco BertEmbeddings from SamuelYang +author: John Snow Labs +name: sentmae_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sentmae_msmarco` is a English model originally trained by SamuelYang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sentmae_msmarco_en_5.1.1_3.0_1694567599891.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sentmae_msmarco_en_5.1.1_3.0_1694567599891.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sentmae_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sentmae_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sentmae_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/SamuelYang/SentMAE_MSMARCO \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-shangpin_pre_training_en.md b/docs/_posts/ahmedlone127/2023-09-13-shangpin_pre_training_en.md new file mode 100644 index 00000000000000..26ed799f5dfb2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-shangpin_pre_training_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English shangpin_pre_training BertEmbeddings from nnn +author: John Snow Labs +name: shangpin_pre_training +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`shangpin_pre_training` is a English model originally trained by nnn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/shangpin_pre_training_en_5.1.1_3.0_1694643216154.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/shangpin_pre_training_en_5.1.1_3.0_1694643216154.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("shangpin_pre_training","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("shangpin_pre_training", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|shangpin_pre_training| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.6 MB| + +## References + +https://huggingface.co/nnn/shangpin-pre-training \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sikubert_zh.md b/docs/_posts/ahmedlone127/2023-09-13-sikubert_zh.md new file mode 100644 index 00000000000000..1f8bbc0e6e1c9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sikubert_zh.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Chinese sikubert BertEmbeddings from SIKU-BERT +author: John Snow Labs +name: sikubert +date: 2023-09-13 +tags: [bert, zh, open_source, fill_mask, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sikubert` is a Chinese model originally trained by SIKU-BERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sikubert_zh_5.1.1_3.0_1694570261032.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sikubert_zh_5.1.1_3.0_1694570261032.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sikubert","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sikubert", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sikubert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|406.0 MB| + +## References + +https://huggingface.co/SIKU-BERT/sikubert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sikuroberta_zh.md b/docs/_posts/ahmedlone127/2023-09-13-sikuroberta_zh.md new file mode 100644 index 00000000000000..e2a866b6c6ab3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sikuroberta_zh.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Chinese sikuroberta BertEmbeddings from SIKU-BERT +author: John Snow Labs +name: sikuroberta +date: 2023-09-13 +tags: [bert, zh, open_source, fill_mask, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sikuroberta` is a Chinese model originally trained by SIKU-BERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sikuroberta_zh_5.1.1_3.0_1694570428058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sikuroberta_zh_5.1.1_3.0_1694570428058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sikuroberta","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sikuroberta", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sikuroberta| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|405.9 MB| + +## References + +https://huggingface.co/SIKU-BERT/sikuroberta \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-simlm_base_msmarco_en.md b/docs/_posts/ahmedlone127/2023-09-13-simlm_base_msmarco_en.md new file mode 100644 index 00000000000000..bd9a20c9fe4cd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-simlm_base_msmarco_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English simlm_base_msmarco BertEmbeddings from intfloat +author: John Snow Labs +name: simlm_base_msmarco +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`simlm_base_msmarco` is a English model originally trained by intfloat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/simlm_base_msmarco_en_5.1.1_3.0_1694566597314.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/simlm_base_msmarco_en_5.1.1_3.0_1694566597314.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("simlm_base_msmarco","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("simlm_base_msmarco", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|simlm_base_msmarco| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/intfloat/simlm-base-msmarco \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-simlm_base_wiki100w_en.md b/docs/_posts/ahmedlone127/2023-09-13-simlm_base_wiki100w_en.md new file mode 100644 index 00000000000000..5d653265f03a17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-simlm_base_wiki100w_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English simlm_base_wiki100w BertEmbeddings from intfloat +author: John Snow Labs +name: simlm_base_wiki100w +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`simlm_base_wiki100w` is a English model originally trained by intfloat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/simlm_base_wiki100w_en_5.1.1_3.0_1694566769454.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/simlm_base_wiki100w_en_5.1.1_3.0_1694566769454.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("simlm_base_wiki100w","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("simlm_base_wiki100w", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|simlm_base_wiki100w| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/intfloat/simlm-base-wiki100w \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-slim_beir_scifact_old_en.md b/docs/_posts/ahmedlone127/2023-09-13-slim_beir_scifact_old_en.md new file mode 100644 index 00000000000000..994317be139b8b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-slim_beir_scifact_old_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English slim_beir_scifact_old BertEmbeddings from kinoo +author: John Snow Labs +name: slim_beir_scifact_old +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`slim_beir_scifact_old` is a English model originally trained by kinoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/slim_beir_scifact_old_en_5.1.1_3.0_1694578412848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/slim_beir_scifact_old_en_5.1.1_3.0_1694578412848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("slim_beir_scifact_old","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("slim_beir_scifact_old", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|slim_beir_scifact_old| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/kinoo/slim_beir-scifact-old \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-slimr_msmarco_passage_en.md b/docs/_posts/ahmedlone127/2023-09-13-slimr_msmarco_passage_en.md new file mode 100644 index 00000000000000..d8f59ad8aa8f32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-slimr_msmarco_passage_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English slimr_msmarco_passage BertEmbeddings from castorini +author: John Snow Labs +name: slimr_msmarco_passage +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`slimr_msmarco_passage` is a English model originally trained by castorini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/slimr_msmarco_passage_en_5.1.1_3.0_1694577246538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/slimr_msmarco_passage_en_5.1.1_3.0_1694577246538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("slimr_msmarco_passage","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("slimr_msmarco_passage", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|slimr_msmarco_passage| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/castorini/slimr-msmarco-passage \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-slimr_pp_msmarco_passage_en.md b/docs/_posts/ahmedlone127/2023-09-13-slimr_pp_msmarco_passage_en.md new file mode 100644 index 00000000000000..1c3c4d707f7af6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-slimr_pp_msmarco_passage_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English slimr_pp_msmarco_passage BertEmbeddings from castorini +author: John Snow Labs +name: slimr_pp_msmarco_passage +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`slimr_pp_msmarco_passage` is a English model originally trained by castorini. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/slimr_pp_msmarco_passage_en_5.1.1_3.0_1694577420042.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/slimr_pp_msmarco_passage_en_5.1.1_3.0_1694577420042.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("slimr_pp_msmarco_passage","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("slimr_pp_msmarco_passage", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|slimr_pp_msmarco_passage| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/castorini/slimr-pp-msmarco-passage \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..9f23d6657fee5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563331949.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563331949.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|135.2 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..32cd6e16c56c25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qqp_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qqp_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qqp_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563423736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563423736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qqp_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qqp_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qqp_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|122.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qqp-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..0c2d846bfce26d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563524788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563524788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_qqp_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|122.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-qqp-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..69cca9ff8c53e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_rte_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_rte_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_rte_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563639389.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563639389.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_rte_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_rte_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_rte_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|126.2 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-rte-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..2bd82070b0129b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563734576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563734576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_rte_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|126.3 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-rte-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..5a93507c7457ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_sst2_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_sst2_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_sst2_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563829652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563829652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_sst2_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_sst2_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_sst2_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|120.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-sst2-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..826dafefb44cac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563918718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563918718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_sst2_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|120.8 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-sst2-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..11d672c756d5fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_stsb_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_stsb_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_stsb_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564136292.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564136292.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_stsb_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_stsb_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_stsb_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|117.4 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-stsb-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..6eac2463e75411 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564017601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564017601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_stsb_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|117.4 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-stsb-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..d92b404f4f39b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_wnli_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_wnli_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_wnli_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564320620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564320620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_wnli_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_wnli_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_wnli_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|108.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-wnli-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..18c0b2d5019597 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564212872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694564212872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|108.1 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-glue-wnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-spa_en.md b/docs/_posts/ahmedlone127/2023-09-13-spa_en.md new file mode 100644 index 00000000000000..709b72b2b9a23b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-spa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spa BertEmbeddings from willemjan +author: John Snow Labs +name: spa +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spa` is a English model originally trained by willemjan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spa_en_5.1.1_3.0_1694585317302.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spa_en_5.1.1_3.0_1694585317302.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("spa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("spa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spa| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/willemjan/spa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-spanish_bert_base_spanish_wwm_cased_en.md b/docs/_posts/ahmedlone127/2023-09-13-spanish_bert_base_spanish_wwm_cased_en.md new file mode 100644 index 00000000000000..4bb20b5f13cff8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-spanish_bert_base_spanish_wwm_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spanish_bert_base_spanish_wwm_cased BertEmbeddings from jorgeortizfuentes +author: John Snow Labs +name: spanish_bert_base_spanish_wwm_cased +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_bert_base_spanish_wwm_cased` is a English model originally trained by jorgeortizfuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_bert_base_spanish_wwm_cased_en_5.1.1_3.0_1694605536375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_bert_base_spanish_wwm_cased_en_5.1.1_3.0_1694605536375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("spanish_bert_base_spanish_wwm_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("spanish_bert_base_spanish_wwm_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_bert_base_spanish_wwm_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/jorgeortizfuentes/spanish-bert-base-spanish-wwm-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-spanish_hate_speech_en.md b/docs/_posts/ahmedlone127/2023-09-13-spanish_hate_speech_en.md new file mode 100644 index 00000000000000..e6a6a6dcba7de3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-spanish_hate_speech_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spanish_hate_speech BertEmbeddings from jorgeortizfuentes +author: John Snow Labs +name: spanish_hate_speech +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_hate_speech` is a English model originally trained by jorgeortizfuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_hate_speech_en_5.1.1_3.0_1694582414893.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_hate_speech_en_5.1.1_3.0_1694582414893.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("spanish_hate_speech","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("spanish_hate_speech", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_hate_speech| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.2 MB| + +## References + +https://huggingface.co/jorgeortizfuentes/spanish_hate_speech \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-spanish_mlm_03_en.md b/docs/_posts/ahmedlone127/2023-09-13-spanish_mlm_03_en.md new file mode 100644 index 00000000000000..0ec8dd7175ef54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-spanish_mlm_03_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spanish_mlm_03 BertEmbeddings from ashwathjadhav23 +author: John Snow Labs +name: spanish_mlm_03 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spanish_mlm_03` is a English model originally trained by ashwathjadhav23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spanish_mlm_03_en_5.1.1_3.0_1694612397384.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spanish_mlm_03_en_5.1.1_3.0_1694612397384.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("spanish_mlm_03","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("spanish_mlm_03", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spanish_mlm_03| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/ashwathjadhav23/Spanish_MLM_03 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sparse_model_en.md b/docs/_posts/ahmedlone127/2023-09-13-sparse_model_en.md new file mode 100644 index 00000000000000..0b52241d4baeeb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sparse_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sparse_model BertEmbeddings from gubartz +author: John Snow Labs +name: sparse_model +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sparse_model` is a English model originally trained by gubartz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sparse_model_en_5.1.1_3.0_1694594774734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sparse_model_en_5.1.1_3.0_1694594774734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sparse_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sparse_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sparse_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/gubartz/sparse_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_base_en.md new file mode 100644 index 00000000000000..dc85b7d7c9991e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spbert_mlm_base BertEmbeddings from razent +author: John Snow Labs +name: spbert_mlm_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spbert_mlm_base` is a English model originally trained by razent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spbert_mlm_base_en_5.1.1_3.0_1694566338968.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spbert_mlm_base_en_5.1.1_3.0_1694566338968.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("spbert_mlm_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("spbert_mlm_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spbert_mlm_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/razent/spbert-mlm-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_wso_base_en.md b/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_wso_base_en.md new file mode 100644 index 00000000000000..36b6d2733147e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-spbert_mlm_wso_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spbert_mlm_wso_base BertEmbeddings from razent +author: John Snow Labs +name: spbert_mlm_wso_base +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spbert_mlm_wso_base` is a English model originally trained by razent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spbert_mlm_wso_base_en_5.1.1_3.0_1694566556828.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spbert_mlm_wso_base_en_5.1.1_3.0_1694566556828.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("spbert_mlm_wso_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("spbert_mlm_wso_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spbert_mlm_wso_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/razent/spbert-mlm-wso-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-splade_all_bertnsp_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-splade_all_bertnsp_220_en.md new file mode 100644 index 00000000000000..cb0f6a81171f89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-splade_all_bertnsp_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_all_bertnsp_220 BertEmbeddings from approach0 +author: John Snow Labs +name: splade_all_bertnsp_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_all_bertnsp_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_all_bertnsp_220_en_5.1.1_3.0_1694634798952.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_all_bertnsp_220_en_5.1.1_3.0_1694634798952.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_all_bertnsp_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_all_bertnsp_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_all_bertnsp_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/splade_all-bertnsp-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-splade_all_cocomae_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-splade_all_cocomae_220_en.md new file mode 100644 index 00000000000000..9381130510091c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-splade_all_cocomae_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_all_cocomae_220 BertEmbeddings from approach0 +author: John Snow Labs +name: splade_all_cocomae_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_all_cocomae_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_all_cocomae_220_en_5.1.1_3.0_1694633376860.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_all_cocomae_220_en_5.1.1_3.0_1694633376860.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_all_cocomae_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_all_cocomae_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_all_cocomae_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/splade_all-cocomae-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-splade_cocondenser_selfdistil_baseplate_en.md b/docs/_posts/ahmedlone127/2023-09-13-splade_cocondenser_selfdistil_baseplate_en.md new file mode 100644 index 00000000000000..a6d5e2441fc700 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-splade_cocondenser_selfdistil_baseplate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_cocondenser_selfdistil_baseplate BertEmbeddings from baseplate +author: John Snow Labs +name: splade_cocondenser_selfdistil_baseplate +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_cocondenser_selfdistil_baseplate` is a English model originally trained by baseplate. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_cocondenser_selfdistil_baseplate_en_5.1.1_3.0_1694585076462.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_cocondenser_selfdistil_baseplate_en_5.1.1_3.0_1694585076462.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_cocondenser_selfdistil_baseplate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_cocondenser_selfdistil_baseplate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_cocondenser_selfdistil_baseplate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/baseplate/splade-cocondenser-selfdistil \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-splade_nomath_bertnsp_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-splade_nomath_bertnsp_220_en.md new file mode 100644 index 00000000000000..2bdc74d8db74e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-splade_nomath_bertnsp_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_nomath_bertnsp_220 BertEmbeddings from approach0 +author: John Snow Labs +name: splade_nomath_bertnsp_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_nomath_bertnsp_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_nomath_bertnsp_220_en_5.1.1_3.0_1694635158811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_nomath_bertnsp_220_en_5.1.1_3.0_1694635158811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_nomath_bertnsp_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_nomath_bertnsp_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_nomath_bertnsp_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/splade_nomath-bertnsp-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-splade_nomath_cocomae_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-splade_nomath_cocomae_220_en.md new file mode 100644 index 00000000000000..d90f7a4b5f6236 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-splade_nomath_cocomae_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_nomath_cocomae_220 BertEmbeddings from approach0 +author: John Snow Labs +name: splade_nomath_cocomae_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_nomath_cocomae_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_nomath_cocomae_220_en_5.1.1_3.0_1694634002866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_nomath_cocomae_220_en_5.1.1_3.0_1694634002866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_nomath_cocomae_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_nomath_cocomae_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_nomath_cocomae_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/splade_nomath-cocomae-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-splade_somemath_bertnsp_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-splade_somemath_bertnsp_220_en.md new file mode 100644 index 00000000000000..263c43c9c0f988 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-splade_somemath_bertnsp_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_somemath_bertnsp_220 BertEmbeddings from approach0 +author: John Snow Labs +name: splade_somemath_bertnsp_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_somemath_bertnsp_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_somemath_bertnsp_220_en_5.1.1_3.0_1694635770562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_somemath_bertnsp_220_en_5.1.1_3.0_1694635770562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_somemath_bertnsp_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_somemath_bertnsp_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_somemath_bertnsp_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.8 MB| + +## References + +https://huggingface.co/approach0/splade_somemath-bertnsp-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-splade_somemath_cocomae_220_en.md b/docs/_posts/ahmedlone127/2023-09-13-splade_somemath_cocomae_220_en.md new file mode 100644 index 00000000000000..3d1debffa80e64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-splade_somemath_cocomae_220_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_somemath_cocomae_220 BertEmbeddings from approach0 +author: John Snow Labs +name: splade_somemath_cocomae_220 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_somemath_cocomae_220` is a English model originally trained by approach0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_somemath_cocomae_220_en_5.1.1_3.0_1694634379900.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_somemath_cocomae_220_en_5.1.1_3.0_1694634379900.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_somemath_cocomae_220","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_somemath_cocomae_220", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_somemath_cocomae_220| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.9 MB| + +## References + +https://huggingface.co/approach0/splade_somemath-cocomae-220 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-structbert_large_en.md b/docs/_posts/ahmedlone127/2023-09-13-structbert_large_en.md new file mode 100644 index 00000000000000..14db2423484389 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-structbert_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English structbert_large BertEmbeddings from bayartsogt +author: John Snow Labs +name: structbert_large +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`structbert_large` is a English model originally trained by bayartsogt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/structbert_large_en_5.1.1_3.0_1694586021635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/structbert_large_en_5.1.1_3.0_1694586021635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("structbert_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("structbert_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|structbert_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/bayartsogt/structbert-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-sw_v1_sw.md b/docs/_posts/ahmedlone127/2023-09-13-sw_v1_sw.md new file mode 100644 index 00000000000000..1b5b9b390cacf3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-sw_v1_sw.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swahili (macrolanguage) sw_v1 BertEmbeddings from eolang +author: John Snow Labs +name: sw_v1 +date: 2023-09-13 +tags: [bert, sw, open_source, fill_mask, onnx] +task: Embeddings +language: sw +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sw_v1` is a Swahili (macrolanguage) model originally trained by eolang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sw_v1_sw_5.1.1_3.0_1694578806650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sw_v1_sw_5.1.1_3.0_1694578806650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sw_v1","sw") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sw_v1", "sw") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sw_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sw| +|Size:|408.0 MB| + +## References + +https://huggingface.co/eolang/SW-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tamil_bert_ta.md b/docs/_posts/ahmedlone127/2023-09-13-tamil_bert_ta.md new file mode 100644 index 00000000000000..8cb5121ac903b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tamil_bert_ta.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Tamil tamil_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: tamil_bert +date: 2023-09-13 +tags: [bert, ta, open_source, fill_mask, onnx] +task: Embeddings +language: ta +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tamil_bert` is a Tamil model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tamil_bert_ta_5.1.1_3.0_1694641183579.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tamil_bert_ta_5.1.1_3.0_1694641183579.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tamil_bert","ta") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tamil_bert", "ta") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tamil_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ta| +|Size:|890.7 MB| + +## References + +https://huggingface.co/l3cube-pune/tamil-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-telugu_bert_te.md b/docs/_posts/ahmedlone127/2023-09-13-telugu_bert_te.md new file mode 100644 index 00000000000000..c79075d944c5d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-telugu_bert_te.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Telugu telugu_bert BertEmbeddings from l3cube-pune +author: John Snow Labs +name: telugu_bert +date: 2023-09-13 +tags: [bert, te, open_source, fill_mask, onnx] +task: Embeddings +language: te +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`telugu_bert` is a Telugu model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/telugu_bert_te_5.1.1_3.0_1694639562892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/telugu_bert_te_5.1.1_3.0_1694639562892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("telugu_bert","te") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("telugu_bert", "te") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|telugu_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|te| +|Size:|890.5 MB| + +## References + +https://huggingface.co/l3cube-pune/telugu-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-test1_stormzj_en.md b/docs/_posts/ahmedlone127/2023-09-13-test1_stormzj_en.md new file mode 100644 index 00000000000000..cf0ce36396b584 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-test1_stormzj_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test1_stormzj BertEmbeddings from StormZJ +author: John Snow Labs +name: test1_stormzj +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test1_stormzj` is a English model originally trained by StormZJ. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test1_stormzj_en_5.1.1_3.0_1694571808785.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test1_stormzj_en_5.1.1_3.0_1694571808785.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test1_stormzj","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test1_stormzj", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test1_stormzj| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/StormZJ/test1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-test_itamarl_en.md b/docs/_posts/ahmedlone127/2023-09-13-test_itamarl_en.md new file mode 100644 index 00000000000000..63efb16550a384 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-test_itamarl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_itamarl BertEmbeddings from Itamarl +author: John Snow Labs +name: test_itamarl +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_itamarl` is a English model originally trained by Itamarl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_itamarl_en_5.1.1_3.0_1694579297569.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_itamarl_en_5.1.1_3.0_1694579297569.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_itamarl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_itamarl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_itamarl| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Itamarl/test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-test_model_allispaul_en.md b/docs/_posts/ahmedlone127/2023-09-13-test_model_allispaul_en.md new file mode 100644 index 00000000000000..f76ff5249edd24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-test_model_allispaul_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_model_allispaul BertEmbeddings from allispaul +author: John Snow Labs +name: test_model_allispaul +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_model_allispaul` is a English model originally trained by allispaul. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_model_allispaul_en_5.1.1_3.0_1694625763081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_model_allispaul_en_5.1.1_3.0_1694625763081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_model_allispaul","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_model_allispaul", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_model_allispaul| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/allispaul/test-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-test_telsayed_en.md b/docs/_posts/ahmedlone127/2023-09-13-test_telsayed_en.md new file mode 100644 index 00000000000000..a4d1b32d58d99d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-test_telsayed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_telsayed BertEmbeddings from telsayed +author: John Snow Labs +name: test_telsayed +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_telsayed` is a English model originally trained by telsayed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_telsayed_en_5.1.1_3.0_1694576902353.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_telsayed_en_5.1.1_3.0_1694576902353.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_telsayed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_telsayed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_telsayed| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/telsayed/test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-testc8_1_en.md b/docs/_posts/ahmedlone127/2023-09-13-testc8_1_en.md new file mode 100644 index 00000000000000..8f4b77e91055a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-testc8_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English testc8_1 BertEmbeddings from shafin +author: John Snow Labs +name: testc8_1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testc8_1` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testc8_1_en_5.1.1_3.0_1694648911715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testc8_1_en_5.1.1_3.0_1694648911715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("testc8_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("testc8_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testc8_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/shafin/testc8-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-testc8_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-testc8_2_en.md new file mode 100644 index 00000000000000..b3e819239f89cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-testc8_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English testc8_2 BertEmbeddings from shafin +author: John Snow Labs +name: testc8_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`testc8_2` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/testc8_2_en_5.1.1_3.0_1694649179749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/testc8_2_en_5.1.1_3.0_1694649179749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("testc8_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("testc8_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|testc8_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.1 MB| + +## References + +https://huggingface.co/shafin/testc8-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tfhbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-tfhbert_en.md new file mode 100644 index 00000000000000..0ee4ae7d344486 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tfhbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tfhbert BertEmbeddings from hsc748NLP +author: John Snow Labs +name: tfhbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tfhbert` is a English model originally trained by hsc748NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tfhbert_en_5.1.1_3.0_1694599490702.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tfhbert_en_5.1.1_3.0_1694599490702.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tfhbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tfhbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tfhbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/hsc748NLP/TfhBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tibert_base_ti.md b/docs/_posts/ahmedlone127/2023-09-13-tibert_base_ti.md new file mode 100644 index 00000000000000..a3dc47f3afca22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tibert_base_ti.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Tigrinya tibert_base BertEmbeddings from fgaim +author: John Snow Labs +name: tibert_base +date: 2023-09-13 +tags: [bert, ti, open_source, fill_mask, onnx] +task: Embeddings +language: ti +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tibert_base` is a Tigrinya model originally trained by fgaim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tibert_base_ti_5.1.1_3.0_1694637403635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tibert_base_ti_5.1.1_3.0_1694637403635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tibert_base","ti") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tibert_base", "ti") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tibert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ti| +|Size:|407.8 MB| + +## References + +https://huggingface.co/fgaim/tibert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tiny_a_2_h_2_en.md b/docs/_posts/ahmedlone127/2023-09-13-tiny_a_2_h_2_en.md new file mode 100644 index 00000000000000..e9217ac1e45314 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tiny_a_2_h_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_a_2_h_2 BertEmbeddings from KamrusSamad +author: John Snow Labs +name: tiny_a_2_h_2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_a_2_h_2` is a English model originally trained by KamrusSamad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_a_2_h_2_en_5.1.1_3.0_1694564331760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_a_2_h_2_en_5.1.1_3.0_1694564331760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_a_2_h_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_a_2_h_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_a_2_h_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/KamrusSamad/tiny_A-2_H-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tiny_biobert_en.md b/docs/_posts/ahmedlone127/2023-09-13-tiny_biobert_en.md new file mode 100644 index 00000000000000..a8f073ec4d30d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tiny_biobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_biobert BertEmbeddings from nlpie +author: John Snow Labs +name: tiny_biobert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_biobert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_biobert_en_5.1.1_3.0_1694576110775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_biobert_en_5.1.1_3.0_1694576110775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_biobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_biobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_biobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|51.9 MB| + +## References + +https://huggingface.co/nlpie/tiny-biobert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tiny_clinicalbert_en.md b/docs/_posts/ahmedlone127/2023-09-13-tiny_clinicalbert_en.md new file mode 100644 index 00000000000000..47f42829007e45 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tiny_clinicalbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_clinicalbert BertEmbeddings from nlpie +author: John Snow Labs +name: tiny_clinicalbert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_clinicalbert` is a English model originally trained by nlpie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_clinicalbert_en_5.1.1_3.0_1694573934228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_clinicalbert_en_5.1.1_3.0_1694573934228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_clinicalbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_clinicalbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_clinicalbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|51.9 MB| + +## References + +https://huggingface.co/nlpie/tiny-clinicalbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md b/docs/_posts/ahmedlone127/2023-09-13-tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md new file mode 100644 index 00000000000000..4fa075f99a585a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563249016.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab_en_5.1.1_3.0_1694563249016.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_glue_wnli_from_scratch_custom_tokenizer_expand_vocab| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-glue-wnli-from-scratch-custom-tokenizer-expand-vocab \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tinybert_general_4l_312d_german_de.md b/docs/_posts/ahmedlone127/2023-09-13-tinybert_general_4l_312d_german_de.md new file mode 100644 index 00000000000000..2deadf696e9db2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tinybert_general_4l_312d_german_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German tinybert_general_4l_312d_german BertEmbeddings from dvm1983 +author: John Snow Labs +name: tinybert_general_4l_312d_german +date: 2023-09-13 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tinybert_general_4l_312d_german` is a German model originally trained by dvm1983. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tinybert_general_4l_312d_german_de_5.1.1_3.0_1694629239176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tinybert_general_4l_312d_german_de_5.1.1_3.0_1694629239176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tinybert_general_4l_312d_german","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tinybert_general_4l_312d_german", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tinybert_general_4l_312d_german| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|54.5 MB| + +## References + +https://huggingface.co/dvm1983/TinyBERT_General_4L_312D_de \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tinybert_general_6l_768d_finetuned_wikitext103_en.md b/docs/_posts/ahmedlone127/2023-09-13-tinybert_general_6l_768d_finetuned_wikitext103_en.md new file mode 100644 index 00000000000000..8fe297ac5f96af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tinybert_general_6l_768d_finetuned_wikitext103_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tinybert_general_6l_768d_finetuned_wikitext103 BertEmbeddings from saghar +author: John Snow Labs +name: tinybert_general_6l_768d_finetuned_wikitext103 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tinybert_general_6l_768d_finetuned_wikitext103` is a English model originally trained by saghar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tinybert_general_6l_768d_finetuned_wikitext103_en_5.1.1_3.0_1694614398325.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tinybert_general_6l_768d_finetuned_wikitext103_en_5.1.1_3.0_1694614398325.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tinybert_general_6l_768d_finetuned_wikitext103","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tinybert_general_6l_768d_finetuned_wikitext103", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tinybert_general_6l_768d_finetuned_wikitext103| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/saghar/TinyBERT_General_6L_768D-finetuned-wikitext103 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tinybert_l_4_h_312_v2_finetuned_wikitext103_en.md b/docs/_posts/ahmedlone127/2023-09-13-tinybert_l_4_h_312_v2_finetuned_wikitext103_en.md new file mode 100644 index 00000000000000..617821d05bb8d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tinybert_l_4_h_312_v2_finetuned_wikitext103_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tinybert_l_4_h_312_v2_finetuned_wikitext103 BertEmbeddings from saghar +author: John Snow Labs +name: tinybert_l_4_h_312_v2_finetuned_wikitext103 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tinybert_l_4_h_312_v2_finetuned_wikitext103` is a English model originally trained by saghar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tinybert_l_4_h_312_v2_finetuned_wikitext103_en_5.1.1_3.0_1694615065926.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tinybert_l_4_h_312_v2_finetuned_wikitext103_en_5.1.1_3.0_1694615065926.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tinybert_l_4_h_312_v2_finetuned_wikitext103","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tinybert_l_4_h_312_v2_finetuned_wikitext103", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tinybert_l_4_h_312_v2_finetuned_wikitext103| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|53.8 MB| + +## References + +https://huggingface.co/saghar/TinyBERT_L-4_H-312_v2-finetuned-wikitext103 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_large_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_large_scale_en.md new file mode 100644 index 00000000000000..de0465f4fac5e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_large_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_ag_large_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_ag_large_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_ag_large_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_ag_large_scale_en_5.1.1_3.0_1694588153957.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_ag_large_scale_en_5.1.1_3.0_1694588153957.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_ag_large_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_ag_large_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_ag_large_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/yxchar/tlm-ag-large-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_medium_scale_en.md new file mode 100644 index 00000000000000..49b628bcf918ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_ag_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_ag_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_ag_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_ag_medium_scale_en_5.1.1_3.0_1694588336137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_ag_medium_scale_en_5.1.1_3.0_1694588336137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_ag_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_ag_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_ag_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.3 MB| + +## References + +https://huggingface.co/yxchar/tlm-ag-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_small_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_small_scale_en.md new file mode 100644 index 00000000000000..93318a8794221f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_ag_small_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_ag_small_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_ag_small_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_ag_small_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_ag_small_scale_en_5.1.1_3.0_1694588495778.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_ag_small_scale_en_5.1.1_3.0_1694588495778.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_ag_small_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_ag_small_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_ag_small_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-ag-small-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_large_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_large_scale_en.md new file mode 100644 index 00000000000000..b9c91319cbb7c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_large_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_amazon_large_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_amazon_large_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_amazon_large_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_amazon_large_scale_en_5.1.1_3.0_1694588840311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_amazon_large_scale_en_5.1.1_3.0_1694588840311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_amazon_large_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_amazon_large_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_amazon_large_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/yxchar/tlm-amazon-large-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_medium_scale_en.md new file mode 100644 index 00000000000000..8ed4a8d6bff3a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_amazon_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_amazon_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_amazon_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_amazon_medium_scale_en_5.1.1_3.0_1694588986337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_amazon_medium_scale_en_5.1.1_3.0_1694588986337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_amazon_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_amazon_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_amazon_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.3 MB| + +## References + +https://huggingface.co/yxchar/tlm-amazon-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_small_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_small_scale_en.md new file mode 100644 index 00000000000000..91e7a56ab6523d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_amazon_small_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_amazon_small_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_amazon_small_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_amazon_small_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_amazon_small_scale_en_5.1.1_3.0_1694589149911.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_amazon_small_scale_en_5.1.1_3.0_1694589149911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_amazon_small_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_amazon_small_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_amazon_small_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-amazon-small-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_large_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_large_scale_en.md new file mode 100644 index 00000000000000..81e55ae4a9ab29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_large_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_chemprot_large_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_chemprot_large_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_chemprot_large_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_chemprot_large_scale_en_5.1.1_3.0_1694589506049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_chemprot_large_scale_en_5.1.1_3.0_1694589506049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_chemprot_large_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_chemprot_large_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_chemprot_large_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/yxchar/tlm-chemprot-large-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_medium_scale_en.md new file mode 100644 index 00000000000000..980cb100621e97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_chemprot_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_chemprot_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_chemprot_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_chemprot_medium_scale_en_5.1.1_3.0_1694589697972.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_chemprot_medium_scale_en_5.1.1_3.0_1694589697972.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_chemprot_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_chemprot_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_chemprot_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-chemprot-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_small_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_small_scale_en.md new file mode 100644 index 00000000000000..c414caa64d0b8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_chemprot_small_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_chemprot_small_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_chemprot_small_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_chemprot_small_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_chemprot_small_scale_en_5.1.1_3.0_1694589852523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_chemprot_small_scale_en_5.1.1_3.0_1694589852523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_chemprot_small_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_chemprot_small_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_chemprot_small_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-chemprot-small-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_large_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_large_scale_en.md new file mode 100644 index 00000000000000..7a984aea63d88a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_large_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_citation_intent_large_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_citation_intent_large_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_citation_intent_large_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_citation_intent_large_scale_en_5.1.1_3.0_1694590161911.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_citation_intent_large_scale_en_5.1.1_3.0_1694590161911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_citation_intent_large_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_citation_intent_large_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_citation_intent_large_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/yxchar/tlm-citation_intent-large-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_medium_scale_en.md new file mode 100644 index 00000000000000..a4fb9d1f03d770 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_citation_intent_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_citation_intent_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_citation_intent_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_citation_intent_medium_scale_en_5.1.1_3.0_1694590345221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_citation_intent_medium_scale_en_5.1.1_3.0_1694590345221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_citation_intent_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_citation_intent_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_citation_intent_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-citation_intent-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_small_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_small_scale_en.md new file mode 100644 index 00000000000000..f7df55ee82308c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_citation_intent_small_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_citation_intent_small_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_citation_intent_small_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_citation_intent_small_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_citation_intent_small_scale_en_5.1.1_3.0_1694590503788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_citation_intent_small_scale_en_5.1.1_3.0_1694590503788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_citation_intent_small_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_citation_intent_small_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_citation_intent_small_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-citation_intent-small-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_large_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_large_scale_en.md new file mode 100644 index 00000000000000..4855486648c0c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_large_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_hyp_large_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_hyp_large_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_hyp_large_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_hyp_large_scale_en_5.1.1_3.0_1694590870329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_hyp_large_scale_en_5.1.1_3.0_1694590870329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_hyp_large_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_hyp_large_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_hyp_large_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/yxchar/tlm-hyp-large-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_medium_scale_en.md new file mode 100644 index 00000000000000..7cfb252c763d5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_hyp_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_hyp_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_hyp_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_hyp_medium_scale_en_5.1.1_3.0_1694591052250.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_hyp_medium_scale_en_5.1.1_3.0_1694591052250.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_hyp_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_hyp_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_hyp_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-hyp-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_small_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_small_scale_en.md new file mode 100644 index 00000000000000..08288300a0a9ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_hyp_small_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_hyp_small_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_hyp_small_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_hyp_small_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_hyp_small_scale_en_5.1.1_3.0_1694591197892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_hyp_small_scale_en_5.1.1_3.0_1694591197892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_hyp_small_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_hyp_small_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_hyp_small_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.3 MB| + +## References + +https://huggingface.co/yxchar/tlm-hyp-small-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_large_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_large_scale_en.md new file mode 100644 index 00000000000000..8e904c407fc1fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_large_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_imdb_large_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_imdb_large_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_imdb_large_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_imdb_large_scale_en_5.1.1_3.0_1694591528950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_imdb_large_scale_en_5.1.1_3.0_1694591528950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_imdb_large_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_imdb_large_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_imdb_large_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/yxchar/tlm-imdb-large-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_medium_scale_en.md new file mode 100644 index 00000000000000..04fc43ae5abd4c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_imdb_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_imdb_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_imdb_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_imdb_medium_scale_en_5.1.1_3.0_1694591680411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_imdb_medium_scale_en_5.1.1_3.0_1694591680411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_imdb_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_imdb_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_imdb_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-imdb-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_small_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_small_scale_en.md new file mode 100644 index 00000000000000..efd07eeeb510bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_imdb_small_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_imdb_small_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_imdb_small_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_imdb_small_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_imdb_small_scale_en_5.1.1_3.0_1694591852708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_imdb_small_scale_en_5.1.1_3.0_1694591852708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_imdb_small_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_imdb_small_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_imdb_small_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-imdb-small-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_large_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_large_scale_en.md new file mode 100644 index 00000000000000..407b317bf5fa2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_large_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_rct_20k_large_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_rct_20k_large_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_rct_20k_large_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_rct_20k_large_scale_en_5.1.1_3.0_1694592138039.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_rct_20k_large_scale_en_5.1.1_3.0_1694592138039.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_rct_20k_large_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_rct_20k_large_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_rct_20k_large_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/yxchar/tlm-rct-20k-large-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_medium_scale_en.md new file mode 100644 index 00000000000000..3f50500679a8ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_rct_20k_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_rct_20k_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_rct_20k_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_rct_20k_medium_scale_en_5.1.1_3.0_1694592289324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_rct_20k_medium_scale_en_5.1.1_3.0_1694592289324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_rct_20k_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_rct_20k_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_rct_20k_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-rct-20k-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_small_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_small_scale_en.md new file mode 100644 index 00000000000000..cb90aa270ac363 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_rct_20k_small_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_rct_20k_small_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_rct_20k_small_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_rct_20k_small_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_rct_20k_small_scale_en_5.1.1_3.0_1694592467272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_rct_20k_small_scale_en_5.1.1_3.0_1694592467272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_rct_20k_small_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_rct_20k_small_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_rct_20k_small_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-rct-20k-small-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_large_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_large_scale_en.md new file mode 100644 index 00000000000000..226b063661ca9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_large_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_sciie_large_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_sciie_large_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_sciie_large_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_sciie_large_scale_en_5.1.1_3.0_1694592765586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_sciie_large_scale_en_5.1.1_3.0_1694592765586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_sciie_large_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_sciie_large_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_sciie_large_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/yxchar/tlm-sciie-large-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_medium_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_medium_scale_en.md new file mode 100644 index 00000000000000..c48b9d11323049 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_medium_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_sciie_medium_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_sciie_medium_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_sciie_medium_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_sciie_medium_scale_en_5.1.1_3.0_1694592917809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_sciie_medium_scale_en_5.1.1_3.0_1694592917809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_sciie_medium_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_sciie_medium_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_sciie_medium_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-sciie-medium-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_small_scale_en.md b/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_small_scale_en.md new file mode 100644 index 00000000000000..33ce8441f75e35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tlm_sciie_small_scale_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tlm_sciie_small_scale BertEmbeddings from yxchar +author: John Snow Labs +name: tlm_sciie_small_scale +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tlm_sciie_small_scale` is a English model originally trained by yxchar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tlm_sciie_small_scale_en_5.1.1_3.0_1694593064743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tlm_sciie_small_scale_en_5.1.1_3.0_1694593064743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tlm_sciie_small_scale","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tlm_sciie_small_scale", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tlm_sciie_small_scale| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.4 MB| + +## References + +https://huggingface.co/yxchar/tlm-sciie-small-scale \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tod_bert_jnt_v1_en.md b/docs/_posts/ahmedlone127/2023-09-13-tod_bert_jnt_v1_en.md new file mode 100644 index 00000000000000..69ed4ebf88f200 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tod_bert_jnt_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tod_bert_jnt_v1 BertEmbeddings from TODBERT +author: John Snow Labs +name: tod_bert_jnt_v1 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tod_bert_jnt_v1` is a English model originally trained by TODBERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tod_bert_jnt_v1_en_5.1.1_3.0_1694572063887.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tod_bert_jnt_v1_en_5.1.1_3.0_1694572063887.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tod_bert_jnt_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tod_bert_jnt_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tod_bert_jnt_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/TODBERT/TOD-BERT-JNT-V1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-topic_erica_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-topic_erica_bert_en.md new file mode 100644 index 00000000000000..5cd4d3f913f1b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-topic_erica_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English topic_erica_bert BertEmbeddings from Kdogs +author: John Snow Labs +name: topic_erica_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`topic_erica_bert` is a English model originally trained by Kdogs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/topic_erica_bert_en_5.1.1_3.0_1694565067616.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/topic_erica_bert_en_5.1.1_3.0_1694565067616.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("topic_erica_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("topic_erica_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|topic_erica_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/Kdogs/topic_erica_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-transformer_exercise_01_en.md b/docs/_posts/ahmedlone127/2023-09-13-transformer_exercise_01_en.md new file mode 100644 index 00000000000000..45e31f24879095 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-transformer_exercise_01_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English transformer_exercise_01 BertEmbeddings from alangganggang +author: John Snow Labs +name: transformer_exercise_01 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`transformer_exercise_01` is a English model originally trained by alangganggang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/transformer_exercise_01_en_5.1.1_3.0_1694577887359.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/transformer_exercise_01_en_5.1.1_3.0_1694577887359.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("transformer_exercise_01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("transformer_exercise_01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|transformer_exercise_01| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/alangganggang/transformer_exercise_01 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tsonga_test2_en.md b/docs/_posts/ahmedlone127/2023-09-13-tsonga_test2_en.md new file mode 100644 index 00000000000000..a9f04732a4b352 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tsonga_test2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tsonga_test2 BertEmbeddings from taesu +author: John Snow Labs +name: tsonga_test2 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tsonga_test2` is a English model originally trained by taesu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tsonga_test2_en_5.1.1_3.0_1694576212078.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tsonga_test2_en_5.1.1_3.0_1694576212078.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tsonga_test2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tsonga_test2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tsonga_test2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.8 MB| + +## References + +https://huggingface.co/taesu/ts-test2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tsonga_test_en.md b/docs/_posts/ahmedlone127/2023-09-13-tsonga_test_en.md new file mode 100644 index 00000000000000..acb9374ea0b290 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tsonga_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tsonga_test BertEmbeddings from taesu +author: John Snow Labs +name: tsonga_test +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tsonga_test` is a English model originally trained by taesu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tsonga_test_en_5.1.1_3.0_1694576071612.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tsonga_test_en_5.1.1_3.0_1694576071612.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tsonga_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tsonga_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tsonga_test| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/taesu/ts-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tulio_bert_es.md b/docs/_posts/ahmedlone127/2023-09-13-tulio_bert_es.md new file mode 100644 index 00000000000000..7d68cf213c0215 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tulio_bert_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish tulio_bert BertEmbeddings from jorgeortizfuentes +author: John Snow Labs +name: tulio_bert +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tulio_bert` is a Castilian, Spanish model originally trained by jorgeortizfuentes. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tulio_bert_es_5.1.1_3.0_1694570756756.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tulio_bert_es_5.1.1_3.0_1694570756756.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tulio_bert","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tulio_bert", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tulio_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.0 MB| + +## References + +https://huggingface.co/jorgeortizfuentes/tulio-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-tulio_chilean_spanish_bert_es.md b/docs/_posts/ahmedlone127/2023-09-13-tulio_chilean_spanish_bert_es.md new file mode 100644 index 00000000000000..66972e202146f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-tulio_chilean_spanish_bert_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish tulio_chilean_spanish_bert BertEmbeddings from dccuchile +author: John Snow Labs +name: tulio_chilean_spanish_bert +date: 2023-09-13 +tags: [bert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tulio_chilean_spanish_bert` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tulio_chilean_spanish_bert_es_5.1.1_3.0_1694608432981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tulio_chilean_spanish_bert_es_5.1.1_3.0_1694608432981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tulio_chilean_spanish_bert","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tulio_chilean_spanish_bert", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tulio_chilean_spanish_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|409.3 MB| + +## References + +https://huggingface.co/dccuchile/tulio-chilean-spanish-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-turkish_base_bert_uncased_tr.md b/docs/_posts/ahmedlone127/2023-09-13-turkish_base_bert_uncased_tr.md new file mode 100644 index 00000000000000..a61127112b0517 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-turkish_base_bert_uncased_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish turkish_base_bert_uncased BertEmbeddings from ytu-ce-cosmos +author: John Snow Labs +name: turkish_base_bert_uncased +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_base_bert_uncased` is a Turkish model originally trained by ytu-ce-cosmos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_base_bert_uncased_tr_5.1.1_3.0_1694631273482.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_base_bert_uncased_tr_5.1.1_3.0_1694631273482.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("turkish_base_bert_uncased","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("turkish_base_bert_uncased", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_base_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|413.0 MB| + +## References + +https://huggingface.co/ytu-ce-cosmos/turkish-base-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-turkish_medium_bert_uncased_tr.md b/docs/_posts/ahmedlone127/2023-09-13-turkish_medium_bert_uncased_tr.md new file mode 100644 index 00000000000000..f2fc72a6a7920a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-turkish_medium_bert_uncased_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish turkish_medium_bert_uncased BertEmbeddings from ytu-ce-cosmos +author: John Snow Labs +name: turkish_medium_bert_uncased +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_medium_bert_uncased` is a Turkish model originally trained by ytu-ce-cosmos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_medium_bert_uncased_tr_5.1.1_3.0_1694629908740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_medium_bert_uncased_tr_5.1.1_3.0_1694629908740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("turkish_medium_bert_uncased","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("turkish_medium_bert_uncased", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_medium_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|157.4 MB| + +## References + +https://huggingface.co/ytu-ce-cosmos/turkish-medium-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-turkish_mini_bert_uncased_tr.md b/docs/_posts/ahmedlone127/2023-09-13-turkish_mini_bert_uncased_tr.md new file mode 100644 index 00000000000000..38b0dace2beb3b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-turkish_mini_bert_uncased_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish turkish_mini_bert_uncased BertEmbeddings from ytu-ce-cosmos +author: John Snow Labs +name: turkish_mini_bert_uncased +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_mini_bert_uncased` is a Turkish model originally trained by ytu-ce-cosmos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_mini_bert_uncased_tr_5.1.1_3.0_1694629425088.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_mini_bert_uncased_tr_5.1.1_3.0_1694629425088.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("turkish_mini_bert_uncased","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("turkish_mini_bert_uncased", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_mini_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|43.3 MB| + +## References + +https://huggingface.co/ytu-ce-cosmos/turkish-mini-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-turkish_small_bert_uncased_tr.md b/docs/_posts/ahmedlone127/2023-09-13-turkish_small_bert_uncased_tr.md new file mode 100644 index 00000000000000..7e662c5a7b01ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-turkish_small_bert_uncased_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish turkish_small_bert_uncased BertEmbeddings from ytu-ce-cosmos +author: John Snow Labs +name: turkish_small_bert_uncased +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_small_bert_uncased` is a Turkish model originally trained by ytu-ce-cosmos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_small_bert_uncased_tr_5.1.1_3.0_1694629693791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_small_bert_uncased_tr_5.1.1_3.0_1694629693791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("turkish_small_bert_uncased","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("turkish_small_bert_uncased", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_small_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|109.9 MB| + +## References + +https://huggingface.co/ytu-ce-cosmos/turkish-small-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-turkish_tiny_bert_uncased_tr.md b/docs/_posts/ahmedlone127/2023-09-13-turkish_tiny_bert_uncased_tr.md new file mode 100644 index 00000000000000..8b1f6a2abe9735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-turkish_tiny_bert_uncased_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish turkish_tiny_bert_uncased BertEmbeddings from ytu-ce-cosmos +author: John Snow Labs +name: turkish_tiny_bert_uncased +date: 2023-09-13 +tags: [bert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`turkish_tiny_bert_uncased` is a Turkish model originally trained by ytu-ce-cosmos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/turkish_tiny_bert_uncased_tr_5.1.1_3.0_1694629276501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/turkish_tiny_bert_uncased_tr_5.1.1_3.0_1694629276501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("turkish_tiny_bert_uncased","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("turkish_tiny_bert_uncased", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|turkish_tiny_bert_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|17.4 MB| + +## References + +https://huggingface.co/ytu-ce-cosmos/turkish-tiny-bert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-twteval_pretrained_en.md b/docs/_posts/ahmedlone127/2023-09-13-twteval_pretrained_en.md new file mode 100644 index 00000000000000..1f668566545b48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-twteval_pretrained_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English twteval_pretrained BertEmbeddings from ArnavL +author: John Snow Labs +name: twteval_pretrained +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`twteval_pretrained` is a English model originally trained by ArnavL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/twteval_pretrained_en_5.1.1_3.0_1694608777119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/twteval_pretrained_en_5.1.1_3.0_1694608777119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("twteval_pretrained","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("twteval_pretrained", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|twteval_pretrained| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ArnavL/twteval-pretrained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-uzbert_base_uncased_uz.md b/docs/_posts/ahmedlone127/2023-09-13-uzbert_base_uncased_uz.md new file mode 100644 index 00000000000000..61739a056f0a81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-uzbert_base_uncased_uz.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Uzbek uzbert_base_uncased BertEmbeddings from coppercitylabs +author: John Snow Labs +name: uzbert_base_uncased +date: 2023-09-13 +tags: [bert, uz, open_source, fill_mask, onnx] +task: Embeddings +language: uz +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`uzbert_base_uncased` is a Uzbek model originally trained by coppercitylabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/uzbert_base_uncased_uz_5.1.1_3.0_1694594401573.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/uzbert_base_uncased_uz_5.1.1_3.0_1694594401573.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("uzbert_base_uncased","uz") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("uzbert_base_uncased", "uz") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|uzbert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|uz| +|Size:|406.8 MB| + +## References + +https://huggingface.co/coppercitylabs/uzbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-wiki_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-wiki_bert_en.md new file mode 100644 index 00000000000000..0489731b258f2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-wiki_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English wiki_bert BertEmbeddings from dhimskyy +author: John Snow Labs +name: wiki_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wiki_bert` is a English model originally trained by dhimskyy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wiki_bert_en_5.1.1_3.0_1694602328160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wiki_bert_en_5.1.1_3.0_1694602328160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("wiki_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("wiki_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wiki_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|847.5 MB| + +## References + +https://huggingface.co/dhimskyy/wiki-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-wineberto_italian_cased_it.md b/docs/_posts/ahmedlone127/2023-09-13-wineberto_italian_cased_it.md new file mode 100644 index 00000000000000..3909a41f8d79b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-wineberto_italian_cased_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian wineberto_italian_cased BertEmbeddings from vinhood +author: John Snow Labs +name: wineberto_italian_cased +date: 2023-09-13 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wineberto_italian_cased` is a Italian model originally trained by vinhood. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wineberto_italian_cased_it_5.1.1_3.0_1694581896429.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wineberto_italian_cased_it_5.1.1_3.0_1694581896429.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("wineberto_italian_cased","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("wineberto_italian_cased", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wineberto_italian_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|412.6 MB| + +## References + +https://huggingface.co/vinhood/wineberto-italian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-wisdomify_en.md b/docs/_posts/ahmedlone127/2023-09-13-wisdomify_en.md new file mode 100644 index 00000000000000..a57d30e1af3118 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-wisdomify_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English wisdomify BertEmbeddings from wisdomify +author: John Snow Labs +name: wisdomify +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wisdomify` is a English model originally trained by wisdomify. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wisdomify_en_5.1.1_3.0_1694585645519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wisdomify_en_5.1.1_3.0_1694585645519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("wisdomify","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("wisdomify", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wisdomify| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.2 MB| + +## References + +https://huggingface.co/wisdomify/wisdomify \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-wobert_chinese_plus_zh.md b/docs/_posts/ahmedlone127/2023-09-13-wobert_chinese_plus_zh.md new file mode 100644 index 00000000000000..1959e2400b1f9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-wobert_chinese_plus_zh.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Chinese wobert_chinese_plus BertEmbeddings from qinluo +author: John Snow Labs +name: wobert_chinese_plus +date: 2023-09-13 +tags: [bert, zh, open_source, fill_mask, onnx] +task: Embeddings +language: zh +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wobert_chinese_plus` is a Chinese model originally trained by qinluo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wobert_chinese_plus_zh_5.1.1_3.0_1694565904418.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wobert_chinese_plus_zh_5.1.1_3.0_1694565904418.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("wobert_chinese_plus","zh") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("wobert_chinese_plus", "zh") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wobert_chinese_plus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|zh| +|Size:|464.5 MB| + +## References + +https://huggingface.co/qinluo/wobert-chinese-plus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-writter_bert_hep_en.md b/docs/_posts/ahmedlone127/2023-09-13-writter_bert_hep_en.md new file mode 100644 index 00000000000000..a2c65af1607008 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-writter_bert_hep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English writter_bert_hep BertEmbeddings from munozariasjm +author: John Snow Labs +name: writter_bert_hep +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`writter_bert_hep` is a English model originally trained by munozariasjm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/writter_bert_hep_en_5.1.1_3.0_1694626233603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/writter_bert_hep_en_5.1.1_3.0_1694626233603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("writter_bert_hep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("writter_bert_hep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|writter_bert_hep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|410.0 MB| + +## References + +https://huggingface.co/munozariasjm/writter_bert_hep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-xtremedistil_l12_h384_uncased_finetuned_wikitext103_en.md b/docs/_posts/ahmedlone127/2023-09-13-xtremedistil_l12_h384_uncased_finetuned_wikitext103_en.md new file mode 100644 index 00000000000000..966362a70b6660 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-xtremedistil_l12_h384_uncased_finetuned_wikitext103_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English xtremedistil_l12_h384_uncased_finetuned_wikitext103 BertEmbeddings from saghar +author: John Snow Labs +name: xtremedistil_l12_h384_uncased_finetuned_wikitext103 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xtremedistil_l12_h384_uncased_finetuned_wikitext103` is a English model originally trained by saghar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xtremedistil_l12_h384_uncased_finetuned_wikitext103_en_5.1.1_3.0_1694616486581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xtremedistil_l12_h384_uncased_finetuned_wikitext103_en_5.1.1_3.0_1694616486581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("xtremedistil_l12_h384_uncased_finetuned_wikitext103","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("xtremedistil_l12_h384_uncased_finetuned_wikitext103", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xtremedistil_l12_h384_uncased_finetuned_wikitext103| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|123.6 MB| + +## References + +https://huggingface.co/saghar/xtremedistil-l12-h384-uncased-finetuned-wikitext103 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-xtremedistil_l6_h384_uncased_finetuned_wikitext103_en.md b/docs/_posts/ahmedlone127/2023-09-13-xtremedistil_l6_h384_uncased_finetuned_wikitext103_en.md new file mode 100644 index 00000000000000..478ff4678dc65a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-xtremedistil_l6_h384_uncased_finetuned_wikitext103_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English xtremedistil_l6_h384_uncased_finetuned_wikitext103 BertEmbeddings from saghar +author: John Snow Labs +name: xtremedistil_l6_h384_uncased_finetuned_wikitext103 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`xtremedistil_l6_h384_uncased_finetuned_wikitext103` is a English model originally trained by saghar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/xtremedistil_l6_h384_uncased_finetuned_wikitext103_en_5.1.1_3.0_1694616303708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/xtremedistil_l6_h384_uncased_finetuned_wikitext103_en_5.1.1_3.0_1694616303708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("xtremedistil_l6_h384_uncased_finetuned_wikitext103","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("xtremedistil_l6_h384_uncased_finetuned_wikitext103", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|xtremedistil_l6_h384_uncased_finetuned_wikitext103| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|83.6 MB| + +## References + +https://huggingface.co/saghar/xtremedistil-l6-h384-uncased-finetuned-wikitext103 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-yelpy_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-yelpy_bert_en.md new file mode 100644 index 00000000000000..2283ab40fddadc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-yelpy_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English yelpy_bert BertEmbeddings from rttl-ai +author: John Snow Labs +name: yelpy_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yelpy_bert` is a English model originally trained by rttl-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yelpy_bert_en_5.1.1_3.0_1694583064434.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yelpy_bert_en_5.1.1_3.0_1694583064434.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("yelpy_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("yelpy_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yelpy_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/rttl-ai/yelpy-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-youtube_bert_10_en.md b/docs/_posts/ahmedlone127/2023-09-13-youtube_bert_10_en.md new file mode 100644 index 00000000000000..50a91796587ee1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-youtube_bert_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English youtube_bert_10 BertEmbeddings from flboehm +author: John Snow Labs +name: youtube_bert_10 +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`youtube_bert_10` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/youtube_bert_10_en_5.1.1_3.0_1694646466454.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/youtube_bert_10_en_5.1.1_3.0_1694646466454.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("youtube_bert_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("youtube_bert_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|youtube_bert_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/flboehm/youtube-bert_10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-13-youtube_bert_en.md b/docs/_posts/ahmedlone127/2023-09-13-youtube_bert_en.md new file mode 100644 index 00000000000000..ffd52bceb7b195 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-13-youtube_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English youtube_bert BertEmbeddings from flboehm +author: John Snow Labs +name: youtube_bert +date: 2023-09-13 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`youtube_bert` is a English model originally trained by flboehm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/youtube_bert_en_5.1.1_3.0_1694646017109.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/youtube_bert_en_5.1.1_3.0_1694646017109.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("youtube_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("youtube_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|youtube_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/flboehm/youtube-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-9.4aistudy_en.md b/docs/_posts/ahmedlone127/2023-09-14-9.4aistudy_en.md new file mode 100644 index 00000000000000..a4c07e832d1b47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-9.4aistudy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 9.4aistudy BertEmbeddings from hangmu +author: John Snow Labs +name: 9.4aistudy +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`9.4aistudy` is a English model originally trained by hangmu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/9.4aistudy_en_5.1.1_3.0_1694652490572.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/9.4aistudy_en_5.1.1_3.0_1694652490572.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("9.4aistudy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("9.4aistudy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|9.4aistudy| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/hangmu/9.4AIstudy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-AethiQs_GemBERT_bertje_50k_en.md b/docs/_posts/ahmedlone127/2023-09-14-AethiQs_GemBERT_bertje_50k_en.md new file mode 100644 index 00000000000000..b85b38307e8310 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-AethiQs_GemBERT_bertje_50k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English AethiQs_GemBERT_bertje_50k BertEmbeddings from AethiQs-Max +author: John Snow Labs +name: AethiQs_GemBERT_bertje_50k +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`AethiQs_GemBERT_bertje_50k` is a English model originally trained by AethiQs-Max. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/AethiQs_GemBERT_bertje_50k_en_5.1.1_3.0_1694692996070.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/AethiQs_GemBERT_bertje_50k_en_5.1.1_3.0_1694692996070.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("AethiQs_GemBERT_bertje_50k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("AethiQs_GemBERT_bertje_50k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|AethiQs_GemBERT_bertje_50k| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.8 MB| + +## References + +https://huggingface.co/AethiQs-Max/AethiQs_GemBERT_bertje_50k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_10_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_10_epochs_en.md new file mode 100644 index 00000000000000..0886b811923735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_10_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English AlephBertGimmel_10_epochs BertEmbeddings from Embible +author: John Snow Labs +name: AlephBertGimmel_10_epochs +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`AlephBertGimmel_10_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/AlephBertGimmel_10_epochs_en_5.1.1_3.0_1694693579710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/AlephBertGimmel_10_epochs_en_5.1.1_3.0_1694693579710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("AlephBertGimmel_10_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("AlephBertGimmel_10_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|AlephBertGimmel_10_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|690.3 MB| + +## References + +https://huggingface.co/Embible/AlephBertGimmel-10-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_20_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_20_epochs_en.md new file mode 100644 index 00000000000000..daf85f4af214f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_20_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English AlephBertGimmel_20_epochs BertEmbeddings from Embible +author: John Snow Labs +name: AlephBertGimmel_20_epochs +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`AlephBertGimmel_20_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/AlephBertGimmel_20_epochs_en_5.1.1_3.0_1694693813229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/AlephBertGimmel_20_epochs_en_5.1.1_3.0_1694693813229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("AlephBertGimmel_20_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("AlephBertGimmel_20_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|AlephBertGimmel_20_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|690.3 MB| + +## References + +https://huggingface.co/Embible/AlephBertGimmel-20-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_50_epochs_en.md b/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_50_epochs_en.md new file mode 100644 index 00000000000000..d7d958a3bbfdfb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-AlephBertGimmel_50_epochs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English AlephBertGimmel_50_epochs BertEmbeddings from Embible +author: John Snow Labs +name: AlephBertGimmel_50_epochs +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`AlephBertGimmel_50_epochs` is a English model originally trained by Embible. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/AlephBertGimmel_50_epochs_en_5.1.1_3.0_1694694021882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/AlephBertGimmel_50_epochs_en_5.1.1_3.0_1694694021882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("AlephBertGimmel_50_epochs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("AlephBertGimmel_50_epochs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|AlephBertGimmel_50_epochs| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|690.0 MB| + +## References + +https://huggingface.co/Embible/AlephBertGimmel-50-epochs \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-BERTUNAM_en.md b/docs/_posts/ahmedlone127/2023-09-14-BERTUNAM_en.md new file mode 100644 index 00000000000000..49913ead495fe5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-BERTUNAM_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English BERTUNAM BertEmbeddings from benanxio +author: John Snow Labs +name: BERTUNAM +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`BERTUNAM` is a English model originally trained by benanxio. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/BERTUNAM_en_5.1.1_3.0_1694692836777.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/BERTUNAM_en_5.1.1_3.0_1694692836777.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("BERTUNAM","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("BERTUNAM", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|BERTUNAM| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/benanxio/BERTUNAM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-BERT_Base_CT_en.md b/docs/_posts/ahmedlone127/2023-09-14-BERT_Base_CT_en.md new file mode 100644 index 00000000000000..ba781f70be62c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-BERT_Base_CT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English BERT_Base_CT BertEmbeddings from Contrastive-Tension +author: John Snow Labs +name: BERT_Base_CT +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`BERT_Base_CT` is a English model originally trained by Contrastive-Tension. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/BERT_Base_CT_en_5.1.1_3.0_1694693914687.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/BERT_Base_CT_en_5.1.1_3.0_1694693914687.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("BERT_Base_CT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("BERT_Base_CT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|BERT_Base_CT| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Contrastive-Tension/BERT-Base-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-BERT_Base_NLI_CT_en.md b/docs/_posts/ahmedlone127/2023-09-14-BERT_Base_NLI_CT_en.md new file mode 100644 index 00000000000000..5da2fbd5db95a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-BERT_Base_NLI_CT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English BERT_Base_NLI_CT BertEmbeddings from Contrastive-Tension +author: John Snow Labs +name: BERT_Base_NLI_CT +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`BERT_Base_NLI_CT` is a English model originally trained by Contrastive-Tension. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/BERT_Base_NLI_CT_en_5.1.1_3.0_1694694097832.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/BERT_Base_NLI_CT_en_5.1.1_3.0_1694694097832.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("BERT_Base_NLI_CT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("BERT_Base_NLI_CT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|BERT_Base_NLI_CT| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Contrastive-Tension/BERT-Base-NLI-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-BERT_Distil_CT_en.md b/docs/_posts/ahmedlone127/2023-09-14-BERT_Distil_CT_en.md new file mode 100644 index 00000000000000..bade2aee450677 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-BERT_Distil_CT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English BERT_Distil_CT DistilBertEmbeddings from Contrastive-Tension +author: John Snow Labs +name: BERT_Distil_CT +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`BERT_Distil_CT` is a English model originally trained by Contrastive-Tension. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/BERT_Distil_CT_en_5.1.2_3.0_1694735209512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/BERT_Distil_CT_en_5.1.2_3.0_1694735209512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("BERT_Distil_CT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("BERT_Distil_CT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|BERT_Distil_CT| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Contrastive-Tension/BERT-Distil-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-BERT_Distil_NLI_CT_en.md b/docs/_posts/ahmedlone127/2023-09-14-BERT_Distil_NLI_CT_en.md new file mode 100644 index 00000000000000..87bb6cbf1ba385 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-BERT_Distil_NLI_CT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English BERT_Distil_NLI_CT DistilBertEmbeddings from Contrastive-Tension +author: John Snow Labs +name: BERT_Distil_NLI_CT +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`BERT_Distil_NLI_CT` is a English model originally trained by Contrastive-Tension. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/BERT_Distil_NLI_CT_en_5.1.2_3.0_1694735318073.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/BERT_Distil_NLI_CT_en_5.1.2_3.0_1694735318073.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("BERT_Distil_NLI_CT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("BERT_Distil_NLI_CT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|BERT_Distil_NLI_CT| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Contrastive-Tension/BERT-Distil-NLI-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-BERT_Large_CT_en.md b/docs/_posts/ahmedlone127/2023-09-14-BERT_Large_CT_en.md new file mode 100644 index 00000000000000..6db7fabbd1f717 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-BERT_Large_CT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English BERT_Large_CT BertEmbeddings from Contrastive-Tension +author: John Snow Labs +name: BERT_Large_CT +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`BERT_Large_CT` is a English model originally trained by Contrastive-Tension. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/BERT_Large_CT_en_5.1.1_3.0_1694694383132.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/BERT_Large_CT_en_5.1.1_3.0_1694694383132.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("BERT_Large_CT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("BERT_Large_CT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|BERT_Large_CT| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/Contrastive-Tension/BERT-Large-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-BiodivBERT_en.md b/docs/_posts/ahmedlone127/2023-09-14-BiodivBERT_en.md new file mode 100644 index 00000000000000..1bec027e681000 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-BiodivBERT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English BiodivBERT BertEmbeddings from NoYo25 +author: John Snow Labs +name: BiodivBERT +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`BiodivBERT` is a English model originally trained by NoYo25. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/BiodivBERT_en_5.1.1_3.0_1694693518067.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/BiodivBERT_en_5.1.1_3.0_1694693518067.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("BiodivBERT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("BiodivBERT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|BiodivBERT| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/NoYo25/BiodivBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-DistilBert_Finetuned_SpMLM_en.md b/docs/_posts/ahmedlone127/2023-09-14-DistilBert_Finetuned_SpMLM_en.md new file mode 100644 index 00000000000000..8a400ec9cac5ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-DistilBert_Finetuned_SpMLM_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English DistilBert_Finetuned_SpMLM DistilBertEmbeddings from ashwathjadhav23 +author: John Snow Labs +name: DistilBert_Finetuned_SpMLM +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`DistilBert_Finetuned_SpMLM` is a English model originally trained by ashwathjadhav23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/DistilBert_Finetuned_SpMLM_en_5.1.2_3.0_1694735812642.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/DistilBert_Finetuned_SpMLM_en_5.1.2_3.0_1694735812642.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("DistilBert_Finetuned_SpMLM","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("DistilBert_Finetuned_SpMLM", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|DistilBert_Finetuned_SpMLM| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/ashwathjadhav23/DistilBert_Finetuned_SpMLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-InLegalBERT_cbp_lkg_triples_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-14-InLegalBERT_cbp_lkg_triples_finetuned_en.md new file mode 100644 index 00000000000000..e77417718d3a58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-InLegalBERT_cbp_lkg_triples_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English InLegalBERT_cbp_lkg_triples_finetuned BertEmbeddings from kinshuk-h +author: John Snow Labs +name: InLegalBERT_cbp_lkg_triples_finetuned +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`InLegalBERT_cbp_lkg_triples_finetuned` is a English model originally trained by kinshuk-h. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/InLegalBERT_cbp_lkg_triples_finetuned_en_5.1.1_3.0_1694694447567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/InLegalBERT_cbp_lkg_triples_finetuned_en_5.1.1_3.0_1694694447567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("InLegalBERT_cbp_lkg_triples_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("InLegalBERT_cbp_lkg_triples_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|InLegalBERT_cbp_lkg_triples_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/kinshuk-h/InLegalBERT-cbp-lkg-triples-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-JavaBERT_en.md b/docs/_posts/ahmedlone127/2023-09-14-JavaBERT_en.md new file mode 100644 index 00000000000000..367a5d85fd1d59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-JavaBERT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English JavaBERT BertEmbeddings from CAUKiel +author: John Snow Labs +name: JavaBERT +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`JavaBERT` is a English model originally trained by CAUKiel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/JavaBERT_en_5.1.1_3.0_1694693773348.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/JavaBERT_en_5.1.1_3.0_1694693773348.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("JavaBERT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("JavaBERT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|JavaBERT| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/CAUKiel/JavaBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-JavaBERT_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-14-JavaBERT_uncased_en.md new file mode 100644 index 00000000000000..f7a98bf80dcd64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-JavaBERT_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English JavaBERT_uncased BertEmbeddings from CAUKiel +author: John Snow Labs +name: JavaBERT_uncased +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`JavaBERT_uncased` is a English model originally trained by CAUKiel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/JavaBERT_uncased_en_5.1.1_3.0_1694693619422.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/JavaBERT_uncased_en_5.1.1_3.0_1694693619422.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("JavaBERT_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("JavaBERT_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|JavaBERT_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/CAUKiel/JavaBERT-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-Masked_Language_Model_en.md b/docs/_posts/ahmedlone127/2023-09-14-Masked_Language_Model_en.md new file mode 100644 index 00000000000000..b335eed5139d0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-Masked_Language_Model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English Masked_Language_Model DistilBertEmbeddings from ayoolaolafenwa +author: John Snow Labs +name: Masked_Language_Model +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`Masked_Language_Model` is a English model originally trained by ayoolaolafenwa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/Masked_Language_Model_en_5.1.2_3.0_1694735583713.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/Masked_Language_Model_en_5.1.2_3.0_1694735583713.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("Masked_Language_Model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("Masked_Language_Model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|Masked_Language_Model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ayoolaolafenwa/Masked-Language-Model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-Medicaldistilbert_en.md b/docs/_posts/ahmedlone127/2023-09-14-Medicaldistilbert_en.md new file mode 100644 index 00000000000000..5f616b7ca0398b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-Medicaldistilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English Medicaldistilbert DistilBertEmbeddings from Gaborandi +author: John Snow Labs +name: Medicaldistilbert +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`Medicaldistilbert` is a English model originally trained by Gaborandi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/Medicaldistilbert_en_5.1.2_3.0_1694735094024.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/Medicaldistilbert_en_5.1.2_3.0_1694735094024.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("Medicaldistilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("Medicaldistilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|Medicaldistilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Gaborandi/Medicaldistilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-UKRI_DistilBERT_en.md b/docs/_posts/ahmedlone127/2023-09-14-UKRI_DistilBERT_en.md new file mode 100644 index 00000000000000..4594ffb756423e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-UKRI_DistilBERT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English UKRI_DistilBERT DistilBertEmbeddings from Brawl +author: John Snow Labs +name: UKRI_DistilBERT +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`UKRI_DistilBERT` is a English model originally trained by Brawl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/UKRI_DistilBERT_en_5.1.2_3.0_1694734860415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/UKRI_DistilBERT_en_5.1.2_3.0_1694734860415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("UKRI_DistilBERT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("UKRI_DistilBERT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|UKRI_DistilBERT| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Brawl/UKRI_DistilBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-akkbert_en.md b/docs/_posts/ahmedlone127/2023-09-14-akkbert_en.md new file mode 100644 index 00000000000000..11ae896949b576 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-akkbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English akkbert BertEmbeddings from megamattc +author: John Snow Labs +name: akkbert +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`akkbert` is a English model originally trained by megamattc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/akkbert_en_5.1.1_3.0_1694663523335.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/akkbert_en_5.1.1_3.0_1694663523335.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("akkbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("akkbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|akkbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|664.9 MB| + +## References + +https://huggingface.co/megamattc/AkkBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-alephbertgimmel_base_512_he.md b/docs/_posts/ahmedlone127/2023-09-14-alephbertgimmel_base_512_he.md new file mode 100644 index 00000000000000..591248169da26e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-alephbertgimmel_base_512_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew alephbertgimmel_base_512 BertEmbeddings from imvladikon +author: John Snow Labs +name: alephbertgimmel_base_512 +date: 2023-09-14 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alephbertgimmel_base_512` is a Hebrew model originally trained by imvladikon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alephbertgimmel_base_512_he_5.1.1_3.0_1694658101075.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alephbertgimmel_base_512_he_5.1.1_3.0_1694658101075.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("alephbertgimmel_base_512","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("alephbertgimmel_base_512", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alephbertgimmel_base_512| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|690.4 MB| + +## References + +https://huggingface.co/imvladikon/alephbertgimmel-base-512 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-aligner_english_vietnamese_en.md b/docs/_posts/ahmedlone127/2023-09-14-aligner_english_vietnamese_en.md new file mode 100644 index 00000000000000..1226fbe15e8e20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-aligner_english_vietnamese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English aligner_english_vietnamese BertEmbeddings from hdmt +author: John Snow Labs +name: aligner_english_vietnamese +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aligner_english_vietnamese` is a English model originally trained by hdmt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aligner_english_vietnamese_en_5.1.1_3.0_1694655583036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aligner_english_vietnamese_en_5.1.1_3.0_1694655583036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("aligner_english_vietnamese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("aligner_english_vietnamese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aligner_english_vietnamese| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/hdmt/aligner-en-vi \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-antismetisim1_finetuned_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-14-antismetisim1_finetuned_mlm_en.md new file mode 100644 index 00000000000000..8a81e08a25e7ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-antismetisim1_finetuned_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English antismetisim1_finetuned_mlm BertEmbeddings from Dhanush66 +author: John Snow Labs +name: antismetisim1_finetuned_mlm +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`antismetisim1_finetuned_mlm` is a English model originally trained by Dhanush66. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/antismetisim1_finetuned_mlm_en_5.1.1_3.0_1694667541883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/antismetisim1_finetuned_mlm_en_5.1.1_3.0_1694667541883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("antismetisim1_finetuned_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("antismetisim1_finetuned_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|antismetisim1_finetuned_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Dhanush66/Antismetisim1-finetuned-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-antismetisimlargedata_finetuned_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-14-antismetisimlargedata_finetuned_mlm_en.md new file mode 100644 index 00000000000000..3283bf335f3c73 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-antismetisimlargedata_finetuned_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English antismetisimlargedata_finetuned_mlm BertEmbeddings from Dhanush66 +author: John Snow Labs +name: antismetisimlargedata_finetuned_mlm +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`antismetisimlargedata_finetuned_mlm` is a English model originally trained by Dhanush66. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/antismetisimlargedata_finetuned_mlm_en_5.1.1_3.0_1694669960122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/antismetisimlargedata_finetuned_mlm_en_5.1.1_3.0_1694669960122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("antismetisimlargedata_finetuned_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("antismetisimlargedata_finetuned_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|antismetisimlargedata_finetuned_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|475.5 MB| + +## References + +https://huggingface.co/Dhanush66/AntismetisimLargedata-finetuned-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-applicationbert_en.md b/docs/_posts/ahmedlone127/2023-09-14-applicationbert_en.md new file mode 100644 index 00000000000000..400887f6307b1e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-applicationbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English applicationbert BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: applicationbert +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`applicationbert` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/applicationbert_en_5.1.1_3.0_1694664670828.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/applicationbert_en_5.1.1_3.0_1694664670828.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("applicationbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("applicationbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|applicationbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/ApplicationBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_mberttok_en.md b/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_mberttok_en.md new file mode 100644 index 00000000000000..8c12c94259580b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_mberttok_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ar_mbertmodel_mberttok BertEmbeddings from hgiyt +author: John Snow Labs +name: ar_mbertmodel_mberttok +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ar_mbertmodel_mberttok` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ar_mbertmodel_mberttok_en_5.1.1_3.0_1694692763225.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ar_mbertmodel_mberttok_en_5.1.1_3.0_1694692763225.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ar_mbertmodel_mberttok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ar_mbertmodel_mberttok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ar_mbertmodel_mberttok| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|545.9 MB| + +## References + +https://huggingface.co/hgiyt/ar-mbertmodel-mberttok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_monotok_adapter_en.md b/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_monotok_adapter_en.md new file mode 100644 index 00000000000000..eb3ef53356892b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_monotok_adapter_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ar_mbertmodel_monotok_adapter BertEmbeddings from hgiyt +author: John Snow Labs +name: ar_mbertmodel_monotok_adapter +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ar_mbertmodel_monotok_adapter` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ar_mbertmodel_monotok_adapter_en_5.1.1_3.0_1694692938173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ar_mbertmodel_monotok_adapter_en_5.1.1_3.0_1694692938173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ar_mbertmodel_monotok_adapter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ar_mbertmodel_monotok_adapter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ar_mbertmodel_monotok_adapter| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.0 MB| + +## References + +https://huggingface.co/hgiyt/ar-mbertmodel-monotok-adapter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_monotok_en.md b/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_monotok_en.md new file mode 100644 index 00000000000000..1ade663e7d7b4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-ar_mbertmodel_monotok_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ar_mbertmodel_monotok BertEmbeddings from hgiyt +author: John Snow Labs +name: ar_mbertmodel_monotok +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ar_mbertmodel_monotok` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ar_mbertmodel_monotok_en_5.1.1_3.0_1694693109864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ar_mbertmodel_monotok_en_5.1.1_3.0_1694693109864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ar_mbertmodel_monotok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ar_mbertmodel_monotok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ar_mbertmodel_monotok| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.0 MB| + +## References + +https://huggingface.co/hgiyt/ar-mbertmodel-monotok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-ar_monomodel_mberttok_en.md b/docs/_posts/ahmedlone127/2023-09-14-ar_monomodel_mberttok_en.md new file mode 100644 index 00000000000000..c358116c4f7da8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-ar_monomodel_mberttok_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ar_monomodel_mberttok BertEmbeddings from hgiyt +author: John Snow Labs +name: ar_monomodel_mberttok +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ar_monomodel_mberttok` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ar_monomodel_mberttok_en_5.1.1_3.0_1694693274766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ar_monomodel_mberttok_en_5.1.1_3.0_1694693274766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ar_monomodel_mberttok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ar_monomodel_mberttok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ar_monomodel_mberttok| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|545.3 MB| + +## References + +https://huggingface.co/hgiyt/ar-monomodel-mberttok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-ar_monomodel_monotok_en.md b/docs/_posts/ahmedlone127/2023-09-14-ar_monomodel_monotok_en.md new file mode 100644 index 00000000000000..22cef2d18b56d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-ar_monomodel_monotok_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ar_monomodel_monotok BertEmbeddings from hgiyt +author: John Snow Labs +name: ar_monomodel_monotok +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ar_monomodel_monotok` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ar_monomodel_monotok_en_5.1.1_3.0_1694693462819.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ar_monomodel_monotok_en_5.1.1_3.0_1694693462819.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ar_monomodel_monotok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ar_monomodel_monotok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ar_monomodel_monotok| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|504.1 MB| + +## References + +https://huggingface.co/hgiyt/ar-monomodel-monotok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-archaeobert_en.md b/docs/_posts/ahmedlone127/2023-09-14-archaeobert_en.md new file mode 100644 index 00000000000000..cd24212f4ff18d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-archaeobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English archaeobert BertEmbeddings from alexbrandsen +author: John Snow Labs +name: archaeobert +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`archaeobert` is a English model originally trained by alexbrandsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/archaeobert_en_5.1.1_3.0_1694650857417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/archaeobert_en_5.1.1_3.0_1694650857417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("archaeobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("archaeobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|archaeobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.5 MB| + +## References + +https://huggingface.co/alexbrandsen/ArchaeoBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-autotrain_acc_keys_2347073860_en.md b/docs/_posts/ahmedlone127/2023-09-14-autotrain_acc_keys_2347073860_en.md new file mode 100644 index 00000000000000..6230f96852d273 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-autotrain_acc_keys_2347073860_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English autotrain_acc_keys_2347073860 BertEmbeddings from alanila +author: John Snow Labs +name: autotrain_acc_keys_2347073860 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_acc_keys_2347073860` is a English model originally trained by alanila. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_acc_keys_2347073860_en_5.1.1_3.0_1694660872048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_acc_keys_2347073860_en_5.1.1_3.0_1694660872048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("autotrain_acc_keys_2347073860","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("autotrain_acc_keys_2347073860", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_acc_keys_2347073860| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/alanila/autotrain-acc_keys-2347073860 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-awesome_align_with_corsican_xx.md b/docs/_posts/ahmedlone127/2023-09-14-awesome_align_with_corsican_xx.md new file mode 100644 index 00000000000000..2a88580dea8082 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-awesome_align_with_corsican_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual awesome_align_with_corsican BertEmbeddings from aneuraz +author: John Snow Labs +name: awesome_align_with_corsican +date: 2023-09-14 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`awesome_align_with_corsican` is a Multilingual model originally trained by aneuraz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/awesome_align_with_corsican_xx_5.1.1_3.0_1694653050397.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/awesome_align_with_corsican_xx_5.1.1_3.0_1694653050397.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("awesome_align_with_corsican","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("awesome_align_with_corsican", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|awesome_align_with_corsican| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/aneuraz/awesome-align-with-co \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-base_mlm_tweet_en.md b/docs/_posts/ahmedlone127/2023-09-14-base_mlm_tweet_en.md new file mode 100644 index 00000000000000..9196ab59237cd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-base_mlm_tweet_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English base_mlm_tweet BertEmbeddings from muhtasham +author: John Snow Labs +name: base_mlm_tweet +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`base_mlm_tweet` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/base_mlm_tweet_en_5.1.1_3.0_1694664760038.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/base_mlm_tweet_en_5.1.1_3.0_1694664760038.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("base_mlm_tweet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("base_mlm_tweet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|base_mlm_tweet| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.6 MB| + +## References + +https://huggingface.co/muhtasham/base-mlm-tweet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_martin_fierro_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_martin_fierro_en.md new file mode 100644 index 00000000000000..533a0b2d425d02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_martin_fierro_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_adaptation_martin_fierro BertEmbeddings from nanom +author: John Snow Labs +name: bert_adaptation_martin_fierro +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_adaptation_martin_fierro` is a English model originally trained by nanom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_adaptation_martin_fierro_en_5.1.1_3.0_1694675031930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_adaptation_martin_fierro_en_5.1.1_3.0_1694675031930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_adaptation_martin_fierro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_adaptation_martin_fierro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_adaptation_martin_fierro| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/nanom/bert_adaptation_martin_fierro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_peppa_pig_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_peppa_pig_en.md new file mode 100644 index 00000000000000..2222bfdf461800 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_peppa_pig_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_adaptation_peppa_pig BertEmbeddings from nanom +author: John Snow Labs +name: bert_adaptation_peppa_pig +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_adaptation_peppa_pig` is a English model originally trained by nanom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_adaptation_peppa_pig_en_5.1.1_3.0_1694675255366.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_adaptation_peppa_pig_en_5.1.1_3.0_1694675255366.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_adaptation_peppa_pig","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_adaptation_peppa_pig", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_adaptation_peppa_pig| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.6 MB| + +## References + +https://huggingface.co/nanom/bert_adaptation_peppa_pig \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_referencias_german_vinos_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_referencias_german_vinos_en.md new file mode 100644 index 00000000000000..d88347a9f4e451 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_referencias_german_vinos_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_adaptation_referencias_german_vinos BertEmbeddings from nanom +author: John Snow Labs +name: bert_adaptation_referencias_german_vinos +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_adaptation_referencias_german_vinos` is a English model originally trained by nanom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_adaptation_referencias_german_vinos_en_5.1.1_3.0_1694676519192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_adaptation_referencias_german_vinos_en_5.1.1_3.0_1694676519192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_adaptation_referencias_german_vinos","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_adaptation_referencias_german_vinos", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_adaptation_referencias_german_vinos| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.7 MB| + +## References + +https://huggingface.co/nanom/bert_adaptation_referencias_de_vinos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_vizwiz_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_vizwiz_en.md new file mode 100644 index 00000000000000..62fe5a06105082 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_adaptation_vizwiz_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_adaptation_vizwiz BertEmbeddings from nanom +author: John Snow Labs +name: bert_adaptation_vizwiz +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_adaptation_vizwiz` is a English model originally trained by nanom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_adaptation_vizwiz_en_5.1.1_3.0_1694675142841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_adaptation_vizwiz_en_5.1.1_3.0_1694675142841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_adaptation_vizwiz","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_adaptation_vizwiz", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_adaptation_vizwiz| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/nanom/bert_adaptation_vizwiz \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_application_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_application_en.md new file mode 100644 index 00000000000000..47bc2e814752ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_application_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_application BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: bert_application +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_application` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_application_en_5.1.1_3.0_1694656899100.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_application_en_5.1.1_3.0_1694656899100.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_application","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_application", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_application| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/BERT-Application \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_arabertv02_finetuned_egyption_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_arabertv02_finetuned_egyption_en.md new file mode 100644 index 00000000000000..bc074294800f3e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_arabertv02_finetuned_egyption_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_arabertv02_finetuned_egyption BertEmbeddings from H-H-T-S +author: John Snow Labs +name: bert_base_arabertv02_finetuned_egyption +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_arabertv02_finetuned_egyption` is a English model originally trained by H-H-T-S. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_finetuned_egyption_en_5.1.1_3.0_1694674744031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_arabertv02_finetuned_egyption_en_5.1.1_3.0_1694674744031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_arabertv02_finetuned_egyption","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_arabertv02_finetuned_egyption", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_arabertv02_finetuned_egyption| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.1 MB| + +## References + +https://huggingface.co/H-H-T-S/bert-base-arabertv02-finetuned-egyption \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_bangla_finetuned_summarization_dataset_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_bangla_finetuned_summarization_dataset_en.md new file mode 100644 index 00000000000000..0af6e45a148065 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_bangla_finetuned_summarization_dataset_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_bangla_finetuned_summarization_dataset BertEmbeddings from arbitropy +author: John Snow Labs +name: bert_base_bangla_finetuned_summarization_dataset +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_bangla_finetuned_summarization_dataset` is a English model originally trained by arbitropy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_bangla_finetuned_summarization_dataset_en_5.1.1_3.0_1694666948637.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_bangla_finetuned_summarization_dataset_en_5.1.1_3.0_1694666948637.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_bangla_finetuned_summarization_dataset","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_bangla_finetuned_summarization_dataset", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_bangla_finetuned_summarization_dataset| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.7 MB| + +## References + +https://huggingface.co/arbitropy/bert-base-bangla-finetuned-summarization-dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_bookcorpus_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_bookcorpus_en.md new file mode 100644 index 00000000000000..3f0d44679ba69c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_bookcorpus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_bookcorpus BertEmbeddings from nicholasKluge +author: John Snow Labs +name: bert_base_bookcorpus +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_bookcorpus` is a English model originally trained by nicholasKluge. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_bookcorpus_en_5.1.1_3.0_1694660865879.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_bookcorpus_en_5.1.1_3.0_1694660865879.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_bookcorpus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_bookcorpus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_bookcorpus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.3 MB| + +## References + +https://huggingface.co/nicholasKluge/bert-base-bookcorpus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_conversational_finetuned_wallisian_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_conversational_finetuned_wallisian_en.md new file mode 100644 index 00000000000000..c7bc69f37d0bc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_conversational_finetuned_wallisian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_conversational_finetuned_wallisian BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_conversational_finetuned_wallisian +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_conversational_finetuned_wallisian` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_conversational_finetuned_wallisian_en_5.1.1_3.0_1694652098523.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_conversational_finetuned_wallisian_en_5.1.1_3.0_1694652098523.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_conversational_finetuned_wallisian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_conversational_finetuned_wallisian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_conversational_finetuned_wallisian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.8 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-conversational-finetuned-wls \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_chemistry_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_chemistry_en.md new file mode 100644 index 00000000000000..06b0d609693eb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_chemistry_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_chemistry BertEmbeddings from Kuaaangwen +author: John Snow Labs +name: bert_base_cased_finetuned_chemistry +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_chemistry` is a English model originally trained by Kuaaangwen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_chemistry_en_5.1.1_3.0_1694662575342.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_chemistry_en_5.1.1_3.0_1694662575342.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_chemistry","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_chemistry", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_chemistry| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Kuaaangwen/bert-base-cased-finetuned-chemistry \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_10ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_10ep_en.md new file mode 100644 index 00000000000000..b1508164ef14fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_10ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_10ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_10ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_10ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_10ep_en_5.1.1_3.0_1694673230803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_10ep_en_5.1.1_3.0_1694673230803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_10ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_10ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_10ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-10ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_1ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_1ep_en.md new file mode 100644 index 00000000000000..d9329758bee6d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_1ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_1ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_1ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_1ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_1ep_en_5.1.1_3.0_1694671264388.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_1ep_en_5.1.1_3.0_1694671264388.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_1ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_1ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_1ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-1ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_2ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_2ep_en.md new file mode 100644 index 00000000000000..76cab26440f341 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_2ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_2ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_2ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_2ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_2ep_en_5.1.1_3.0_1694671483581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_2ep_en_5.1.1_3.0_1694671483581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_2ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_2ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_2ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-2ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_3ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_3ep_en.md new file mode 100644 index 00000000000000..c2df8cab1a6d4a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_3ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_3ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_3ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_3ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_3ep_en_5.1.1_3.0_1694671699838.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_3ep_en_5.1.1_3.0_1694671699838.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_3ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_3ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_3ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-3ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_4ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_4ep_en.md new file mode 100644 index 00000000000000..31140ef3ac2ca7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_4ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_4ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_4ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_4ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_4ep_en_5.1.1_3.0_1694671916586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_4ep_en_5.1.1_3.0_1694671916586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_4ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_4ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_4ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-4ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_5ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_5ep_en.md new file mode 100644 index 00000000000000..8b557a038a1c52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_5ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_5ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_5ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_5ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_5ep_en_5.1.1_3.0_1694672136887.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_5ep_en_5.1.1_3.0_1694672136887.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_5ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_5ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_5ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-5ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_6ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_6ep_en.md new file mode 100644 index 00000000000000..5c362febe31be4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_6ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_6ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_6ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_6ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_6ep_en_5.1.1_3.0_1694672358346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_6ep_en_5.1.1_3.0_1694672358346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_6ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_6ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_6ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-6ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_7ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_7ep_en.md new file mode 100644 index 00000000000000..a1c6991c44f6cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_7ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_7ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_7ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_7ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_7ep_en_5.1.1_3.0_1694672574132.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_7ep_en_5.1.1_3.0_1694672574132.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_7ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_7ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_7ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-7ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_8ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_8ep_en.md new file mode 100644 index 00000000000000..6023d76736cc6a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_8ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_8ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_8ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_8ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_8ep_en_5.1.1_3.0_1694672793233.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_8ep_en_5.1.1_3.0_1694672793233.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_8ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_8ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_8ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-8ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_9ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_9ep_en.md new file mode 100644 index 00000000000000..3b7fe505732776 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_manual_9ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_manual_9ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_manual_9ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_manual_9ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_9ep_en_5.1.1_3.0_1694673007935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_manual_9ep_en_5.1.1_3.0_1694673007935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_manual_9ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_manual_9ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_manual_9ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-manual-9ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_10ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_10ep_en.md new file mode 100644 index 00000000000000..8907e33aa75460 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_10ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_10ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_10ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_10ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_10ep_en_5.1.1_3.0_1694671157614.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_10ep_en_5.1.1_3.0_1694671157614.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_10ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_10ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_10ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-10ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_1ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_1ep_en.md new file mode 100644 index 00000000000000..586ef7cf3abcb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_1ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_1ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_1ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_1ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_1ep_en_5.1.1_3.0_1694670183935.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_1ep_en_5.1.1_3.0_1694670183935.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_1ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_1ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_1ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-1ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_2ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_2ep_en.md new file mode 100644 index 00000000000000..9b838a1b0d25b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_2ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_2ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_2ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_2ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_2ep_en_5.1.1_3.0_1694670290798.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_2ep_en_5.1.1_3.0_1694670290798.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_2ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_2ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_2ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-2ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_3ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_3ep_en.md new file mode 100644 index 00000000000000..ce81512a20d135 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_3ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_3ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_3ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_3ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_3ep_en_5.1.1_3.0_1694670399822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_3ep_en_5.1.1_3.0_1694670399822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_3ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_3ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_3ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-3ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_4ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_4ep_en.md new file mode 100644 index 00000000000000..ca7e21df625eee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_4ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_4ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_4ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_4ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_4ep_en_5.1.1_3.0_1694670508413.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_4ep_en_5.1.1_3.0_1694670508413.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_4ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_4ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_4ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-4ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_5ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_5ep_en.md new file mode 100644 index 00000000000000..0a0240cda3e66a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_5ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_5ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_5ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_5ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_5ep_en_5.1.1_3.0_1694670617445.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_5ep_en_5.1.1_3.0_1694670617445.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_5ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_5ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_5ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-5ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_6ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_6ep_en.md new file mode 100644 index 00000000000000..84352c6d887e03 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_6ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_6ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_6ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_6ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_6ep_en_5.1.1_3.0_1694670727672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_6ep_en_5.1.1_3.0_1694670727672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_6ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_6ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_6ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-6ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_7ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_7ep_en.md new file mode 100644 index 00000000000000..6e840d323af79f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_7ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_7ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_7ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_7ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_7ep_en_5.1.1_3.0_1694670836499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_7ep_en_5.1.1_3.0_1694670836499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_7ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_7ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_7ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-7ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_8ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_8ep_en.md new file mode 100644 index 00000000000000..e8996105cf1042 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_8ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_8ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_8ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_8ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_8ep_en_5.1.1_3.0_1694670943658.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_8ep_en_5.1.1_3.0_1694670943658.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_8ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_8ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_8ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-8ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_9ep_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_9ep_en.md new file mode 100644 index 00000000000000..03359371fcbeb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_cased_finetuned_wallisian_whisper_9ep_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_cased_finetuned_wallisian_whisper_9ep BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_cased_finetuned_wallisian_whisper_9ep +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_cased_finetuned_wallisian_whisper_9ep` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_9ep_en_5.1.1_3.0_1694671051592.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_cased_finetuned_wallisian_whisper_9ep_en_5.1.1_3.0_1694671051592.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_cased_finetuned_wallisian_whisper_9ep","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_cased_finetuned_wallisian_whisper_9ep", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_cased_finetuned_wallisian_whisper_9ep| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/btamm12/bert-base-cased-finetuned-wls-whisper-9ep \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_code_comments_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_code_comments_en.md new file mode 100644 index 00000000000000..2eaf964fb5fffd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_code_comments_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_code_comments BertEmbeddings from giganticode +author: John Snow Labs +name: bert_base_code_comments +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_code_comments` is a English model originally trained by giganticode. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_code_comments_en_5.1.1_3.0_1694649864357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_code_comments_en_5.1.1_3.0_1694649864357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_code_comments","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_code_comments", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_code_comments| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/giganticode/bert-base-code_comments \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_german_cased_mlm_basque_chemistry_regulation_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_german_cased_mlm_basque_chemistry_regulation_en.md new file mode 100644 index 00000000000000..e95d32ec292b16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_german_cased_mlm_basque_chemistry_regulation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_german_cased_mlm_basque_chemistry_regulation BertEmbeddings from jonas-luehrs +author: John Snow Labs +name: bert_base_german_cased_mlm_basque_chemistry_regulation +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_cased_mlm_basque_chemistry_regulation` is a English model originally trained by jonas-luehrs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_mlm_basque_chemistry_regulation_en_5.1.1_3.0_1694669620686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_cased_mlm_basque_chemistry_regulation_en_5.1.1_3.0_1694669620686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_cased_mlm_basque_chemistry_regulation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_cased_mlm_basque_chemistry_regulation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_cased_mlm_basque_chemistry_regulation| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.9 MB| + +## References + +https://huggingface.co/jonas-luehrs/bert-base-german-cased-MLM-eu_chemistry_regulation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_german_europeana_td_cased_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_german_europeana_td_cased_en.md new file mode 100644 index 00000000000000..c9a951f7840630 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_german_europeana_td_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_german_europeana_td_cased BertEmbeddings from dbmdz +author: John Snow Labs +name: bert_base_german_europeana_td_cased +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_german_europeana_td_cased` is a English model originally trained by dbmdz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_german_europeana_td_cased_en_5.1.1_3.0_1694652557402.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_german_europeana_td_cased_en_5.1.1_3.0_1694652557402.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_german_europeana_td_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_german_europeana_td_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_german_europeana_td_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/dbmdz/bert-base-german-europeana-td-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_greek_uncased_v5_finetuned_polylex_malagasy_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_greek_uncased_v5_finetuned_polylex_malagasy_en.md new file mode 100644 index 00000000000000..fbc8517052ed2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_greek_uncased_v5_finetuned_polylex_malagasy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_greek_uncased_v5_finetuned_polylex_malagasy BertEmbeddings from snousias +author: John Snow Labs +name: bert_base_greek_uncased_v5_finetuned_polylex_malagasy +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_greek_uncased_v5_finetuned_polylex_malagasy` is a English model originally trained by snousias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v5_finetuned_polylex_malagasy_en_5.1.1_3.0_1694649633484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v5_finetuned_polylex_malagasy_en_5.1.1_3.0_1694649633484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_greek_uncased_v5_finetuned_polylex_malagasy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_greek_uncased_v5_finetuned_polylex_malagasy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_greek_uncased_v5_finetuned_polylex_malagasy| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/snousias/bert-base-greek-uncased-v5-finetuned-polylex-mg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_greek_uncased_v6_finetuned_polylex_malagasy_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_greek_uncased_v6_finetuned_polylex_malagasy_en.md new file mode 100644 index 00000000000000..2d05099788c1f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_greek_uncased_v6_finetuned_polylex_malagasy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_greek_uncased_v6_finetuned_polylex_malagasy BertEmbeddings from polylexmg +author: John Snow Labs +name: bert_base_greek_uncased_v6_finetuned_polylex_malagasy +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_greek_uncased_v6_finetuned_polylex_malagasy` is a English model originally trained by polylexmg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v6_finetuned_polylex_malagasy_en_5.1.1_3.0_1694649912799.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_greek_uncased_v6_finetuned_polylex_malagasy_en_5.1.1_3.0_1694649912799.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_greek_uncased_v6_finetuned_polylex_malagasy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_greek_uncased_v6_finetuned_polylex_malagasy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_greek_uncased_v6_finetuned_polylex_malagasy| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|421.1 MB| + +## References + +https://huggingface.co/polylexmg/bert-base-greek-uncased-v6-finetuned-polylex-mg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_kor_v1_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_kor_v1_en.md new file mode 100644 index 00000000000000..687a9c8126d399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_kor_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_kor_v1 BertEmbeddings from bongsoo +author: John Snow Labs +name: bert_base_kor_v1 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_kor_v1` is a English model originally trained by bongsoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_kor_v1_en_5.1.1_3.0_1694653204759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_kor_v1_en_5.1.1_3.0_1694653204759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_kor_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_kor_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_kor_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.6 MB| + +## References + +https://huggingface.co/bongsoo/bert-base-kor-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_minipile_128_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_minipile_128_en.md new file mode 100644 index 00000000000000..f2cd21d687e58e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_minipile_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_minipile_128 BertEmbeddings from seba +author: John Snow Labs +name: bert_base_minipile_128 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_minipile_128` is a English model originally trained by seba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_minipile_128_en_5.1.1_3.0_1694666014699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_minipile_128_en_5.1.1_3.0_1694666014699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_minipile_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_minipile_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_minipile_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.6 MB| + +## References + +https://huggingface.co/seba/bert-base-minipile-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_spanish_wwm_cased_finetuned_peppa_pig_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_spanish_wwm_cased_finetuned_peppa_pig_en.md new file mode 100644 index 00000000000000..24a4d3ea8867ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_spanish_wwm_cased_finetuned_peppa_pig_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_spanish_wwm_cased_finetuned_peppa_pig BertEmbeddings from guidoivetta +author: John Snow Labs +name: bert_base_spanish_wwm_cased_finetuned_peppa_pig +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_finetuned_peppa_pig` is a English model originally trained by guidoivetta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_peppa_pig_en_5.1.1_3.0_1694669728508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_peppa_pig_en_5.1.1_3.0_1694669728508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_cased_finetuned_peppa_pig","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_cased_finetuned_peppa_pig", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_finetuned_peppa_pig| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/guidoivetta/bert-base-spanish-wwm-cased-finetuned-peppa-pig \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish_en.md new file mode 100644 index 00000000000000..3c0e42b22f109e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish BertEmbeddings from guidoivetta +author: John Snow Labs +name: bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish` is a English model originally trained by guidoivetta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish_en_5.1.1_3.0_1694669839262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish_en_5.1.1_3.0_1694669839262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_spanish_wwm_cased_finetuned_wine_reviews_spanish| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/guidoivetta/bert-base-spanish-wwm-cased-finetuned-wine-reviews_spanish \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_duplicate_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_duplicate_en.md new file mode 100644 index 00000000000000..80597bd7dbab61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_duplicate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_duplicate BertEmbeddings from julien-c +author: John Snow Labs +name: bert_base_uncased_duplicate +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_duplicate` is a English model originally trained by julien-c. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_duplicate_en_5.1.1_3.0_1694665842003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_duplicate_en_5.1.1_3.0_1694665842003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_duplicate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_duplicate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_duplicate| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/julien-c/bert-base-uncased-duplicate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_bert_auto2_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_bert_auto2_en.md new file mode 100644 index 00000000000000..1cc36fd98d0c61 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_bert_auto2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bert_auto2 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_bert_auto2 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bert_auto2` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto2_en_5.1.1_3.0_1694674618662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto2_en_5.1.1_3.0_1694674618662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bert_auto2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bert_auto2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bert_auto2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-bert-auto2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_bert_auto3_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_bert_auto3_en.md new file mode 100644 index 00000000000000..1241d7849b3775 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_bert_auto3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_bert_auto3 BertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: bert_base_uncased_finetuned_bert_auto3 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_bert_auto3` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto3_en_5.1.1_3.0_1694674856695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_bert_auto3_en_5.1.1_3.0_1694674856695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_bert_auto3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_bert_auto3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_bert_auto3| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/bert-base-uncased-finetuned-bert-auto3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_char_hangman_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_char_hangman_en.md new file mode 100644 index 00000000000000..16454bec700c98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_char_hangman_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_char_hangman BertEmbeddings from bhagasra-saurav +author: John Snow Labs +name: bert_base_uncased_finetuned_char_hangman +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_char_hangman` is a English model originally trained by bhagasra-saurav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_char_hangman_en_5.1.1_3.0_1694659746036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_char_hangman_en_5.1.1_3.0_1694659746036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_char_hangman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_char_hangman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_char_hangman| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/bhagasra-saurav/bert-base-uncased-finetuned-char-hangman \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_en.md new file mode 100644 index 00000000000000..4c56d62315f39d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_en_5.1.1_3.0_1694653293253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_en_5.1.1_3.0_1694653293253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_lower_en.md new file mode 100644 index 00000000000000..98fcde4093640e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_lower_en_5.1.1_3.0_1694653735399.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_lower_en_5.1.1_3.0_1694653735399.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_10ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_10ep_lower_en.md new file mode 100644 index 00000000000000..a4a70f49e4b045 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_10ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_10ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_10ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_10ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_10ep_lower_en_5.1.1_3.0_1694673340561.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_10ep_lower_en_5.1.1_3.0_1694673340561.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_10ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_10ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_10ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-10ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_1ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_1ep_lower_en.md new file mode 100644 index 00000000000000..039e69a18f7a89 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_1ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_1ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_1ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_1ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_1ep_lower_en_5.1.1_3.0_1694671374906.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_1ep_lower_en_5.1.1_3.0_1694671374906.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_1ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_1ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_1ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-1ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_2ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_2ep_lower_en.md new file mode 100644 index 00000000000000..3311fb19c57a28 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_2ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_2ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_2ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_2ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_2ep_lower_en_5.1.1_3.0_1694671592012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_2ep_lower_en_5.1.1_3.0_1694671592012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_2ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_2ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_2ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-2ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_3ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_3ep_lower_en.md new file mode 100644 index 00000000000000..2d5fd0bf93e234 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_3ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_3ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_3ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_3ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_3ep_lower_en_5.1.1_3.0_1694671806791.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_3ep_lower_en_5.1.1_3.0_1694671806791.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_3ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_3ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_3ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-3ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_4ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_4ep_lower_en.md new file mode 100644 index 00000000000000..7f501fa3d04407 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_4ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_4ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_4ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_4ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_4ep_lower_en_5.1.1_3.0_1694672025708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_4ep_lower_en_5.1.1_3.0_1694672025708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_4ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_4ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_4ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-4ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_5ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_5ep_lower_en.md new file mode 100644 index 00000000000000..be3bcbd50aaa7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_5ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_5ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_5ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_5ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_5ep_lower_en_5.1.1_3.0_1694672247698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_5ep_lower_en_5.1.1_3.0_1694672247698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_5ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_5ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_5ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-5ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_6ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_6ep_lower_en.md new file mode 100644 index 00000000000000..7e0b0f06d1899d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_6ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_6ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_6ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_6ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_6ep_lower_en_5.1.1_3.0_1694672466472.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_6ep_lower_en_5.1.1_3.0_1694672466472.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_6ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_6ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_6ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-6ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_7ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_7ep_lower_en.md new file mode 100644 index 00000000000000..3c8ebc6681e002 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_7ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_7ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_7ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_7ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_7ep_lower_en_5.1.1_3.0_1694672684834.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_7ep_lower_en_5.1.1_3.0_1694672684834.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_7ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_7ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_7ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-7ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_8ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_8ep_lower_en.md new file mode 100644 index 00000000000000..aaba40ad3784d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_8ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_8ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_8ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_8ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_8ep_lower_en_5.1.1_3.0_1694672900256.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_8ep_lower_en_5.1.1_3.0_1694672900256.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_8ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_8ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_8ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-8ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_9ep_lower_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_9ep_lower_en.md new file mode 100644 index 00000000000000..6a36b8b63f5110 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wallisian_manual_9ep_lower_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wallisian_manual_9ep_lower BertEmbeddings from btamm12 +author: John Snow Labs +name: bert_base_uncased_finetuned_wallisian_manual_9ep_lower +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wallisian_manual_9ep_lower` is a English model originally trained by btamm12. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_9ep_lower_en_5.1.1_3.0_1694673120991.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wallisian_manual_9ep_lower_en_5.1.1_3.0_1694673120991.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wallisian_manual_9ep_lower","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wallisian_manual_9ep_lower", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wallisian_manual_9ep_lower| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/btamm12/bert-base-uncased-finetuned-wls-manual-9ep-lower \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wikitext_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wikitext_en.md new file mode 100644 index 00000000000000..0c88f303be7b25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_finetuned_wikitext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_wikitext BertEmbeddings from peteryushunli +author: John Snow Labs +name: bert_base_uncased_finetuned_wikitext +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_wikitext` is a English model originally trained by peteryushunli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wikitext_en_5.1.1_3.0_1694667435437.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_wikitext_en_5.1.1_3.0_1694667435437.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_finetuned_wikitext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_finetuned_wikitext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_wikitext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/peteryushunli/bert-base-uncased-finetuned-wikitext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_abhilashawasthi_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_abhilashawasthi_en.md new file mode 100644 index 00000000000000..1163955e9e3162 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_abhilashawasthi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_abhilashawasthi BertEmbeddings from abhilashawasthi +author: John Snow Labs +name: bert_base_uncased_issues_128_abhilashawasthi +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_abhilashawasthi` is a English model originally trained by abhilashawasthi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_abhilashawasthi_en_5.1.1_3.0_1694658897200.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_abhilashawasthi_en_5.1.1_3.0_1694658897200.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_abhilashawasthi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_abhilashawasthi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_abhilashawasthi| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/abhilashawasthi/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_bh8648_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_bh8648_en.md new file mode 100644 index 00000000000000..b462324ad6a751 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_bh8648_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_bh8648 BertEmbeddings from bh8648 +author: John Snow Labs +name: bert_base_uncased_issues_128_bh8648 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_bh8648` is a English model originally trained by bh8648. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_bh8648_en_5.1.1_3.0_1694652857821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_bh8648_en_5.1.1_3.0_1694652857821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_bh8648","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_bh8648", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_bh8648| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/bh8648/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_ckandemir_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_ckandemir_en.md new file mode 100644 index 00000000000000..94305278d80f21 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_ckandemir_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_ckandemir BertEmbeddings from ckandemir +author: John Snow Labs +name: bert_base_uncased_issues_128_ckandemir +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_ckandemir` is a English model originally trained by ckandemir. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_ckandemir_en_5.1.1_3.0_1694674334514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_ckandemir_en_5.1.1_3.0_1694674334514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_ckandemir","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_ckandemir", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_ckandemir| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/ckandemir/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_en.md new file mode 100644 index 00000000000000..3e2162a7a25ab0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128 BertEmbeddings from susnato +author: John Snow Labs +name: bert_base_uncased_issues_128 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128` is a English model originally trained by susnato. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_en_5.1.1_3.0_1694693094585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_en_5.1.1_3.0_1694693094585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/susnato/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_mabrouk_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_mabrouk_en.md new file mode 100644 index 00000000000000..01075aee7ecbd6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_mabrouk_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_mabrouk BertEmbeddings from mabrouk +author: John Snow Labs +name: bert_base_uncased_issues_128_mabrouk +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_mabrouk` is a English model originally trained by mabrouk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_mabrouk_en_5.1.1_3.0_1694651316234.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_mabrouk_en_5.1.1_3.0_1694651316234.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_mabrouk","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_mabrouk", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_mabrouk| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/mabrouk/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_reaverlee_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_reaverlee_en.md new file mode 100644 index 00000000000000..f288864d7ffbf1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_reaverlee_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_reaverlee BertEmbeddings from reaverlee +author: John Snow Labs +name: bert_base_uncased_issues_128_reaverlee +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_reaverlee` is a English model originally trained by reaverlee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_reaverlee_en_5.1.1_3.0_1694659370450.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_reaverlee_en_5.1.1_3.0_1694659370450.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_reaverlee","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_reaverlee", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_reaverlee| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/reaverlee/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_veeps_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_veeps_en.md new file mode 100644 index 00000000000000..d44fce6405fd5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_issues_128_veeps_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_issues_128_veeps BertEmbeddings from veeps +author: John Snow Labs +name: bert_base_uncased_issues_128_veeps +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_issues_128_veeps` is a English model originally trained by veeps. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_veeps_en_5.1.1_3.0_1694652544895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_issues_128_veeps_en_5.1.1_3.0_1694652544895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_issues_128_veeps","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_issues_128_veeps", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_issues_128_veeps| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/veeps/bert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_mlm_scirepeval_fos_chemistry_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_mlm_scirepeval_fos_chemistry_en.md new file mode 100644 index 00000000000000..f7b0cea77a16f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_mlm_scirepeval_fos_chemistry_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_mlm_scirepeval_fos_chemistry BertEmbeddings from jonas-luehrs +author: John Snow Labs +name: bert_base_uncased_mlm_scirepeval_fos_chemistry +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_mlm_scirepeval_fos_chemistry` is a English model originally trained by jonas-luehrs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mlm_scirepeval_fos_chemistry_en_5.1.1_3.0_1694658556245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mlm_scirepeval_fos_chemistry_en_5.1.1_3.0_1694658556245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_mlm_scirepeval_fos_chemistry","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_mlm_scirepeval_fos_chemistry", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_mlm_scirepeval_fos_chemistry| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jonas-luehrs/bert-base-uncased-MLM-scirepeval_fos_chemistry \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_mlp_scirepeval_chemistry_large_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_mlp_scirepeval_chemistry_large_en.md new file mode 100644 index 00000000000000..aa64f8586a81d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_mlp_scirepeval_chemistry_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_mlp_scirepeval_chemistry_large BertEmbeddings from jonas-luehrs +author: John Snow Labs +name: bert_base_uncased_mlp_scirepeval_chemistry_large +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_mlp_scirepeval_chemistry_large` is a English model originally trained by jonas-luehrs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mlp_scirepeval_chemistry_large_en_5.1.1_3.0_1694663054967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_mlp_scirepeval_chemistry_large_en_5.1.1_3.0_1694663054967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_mlp_scirepeval_chemistry_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_mlp_scirepeval_chemistry_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_mlp_scirepeval_chemistry_large| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jonas-luehrs/bert-base-uncased-MLP-scirepeval-chemistry-LARGE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_narsil_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_narsil_en.md new file mode 100644 index 00000000000000..0112a7f88adebe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_narsil_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_narsil BertEmbeddings from Narsil +author: John Snow Labs +name: bert_base_uncased_narsil +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_narsil` is a English model originally trained by Narsil. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_narsil_en_5.1.1_3.0_1694650332859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_narsil_en_5.1.1_3.0_1694650332859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_narsil","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_narsil", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_narsil| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/Narsil/bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_reviews_128_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_reviews_128_en.md new file mode 100644 index 00000000000000..40dc7540f16c97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_uncased_reviews_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_reviews_128 BertEmbeddings from abhilashawasthi +author: John Snow Labs +name: bert_base_uncased_reviews_128 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_reviews_128` is a English model originally trained by abhilashawasthi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_128_en_5.1.1_3.0_1694659261358.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_reviews_128_en_5.1.1_3.0_1694659261358.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_uncased_reviews_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_uncased_reviews_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_reviews_128| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/abhilashawasthi/bert-base-uncased-reviews-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_base_wikitext_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_base_wikitext_en.md new file mode 100644 index 00000000000000..cb7988784efdd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_base_wikitext_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_wikitext BertEmbeddings from nicholasKluge +author: John Snow Labs +name: bert_base_wikitext +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_wikitext` is a English model originally trained by nicholasKluge. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_wikitext_en_5.1.1_3.0_1694661280094.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_wikitext_en_5.1.1_3.0_1694661280094.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_base_wikitext","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_base_wikitext", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_wikitext| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.0 MB| + +## References + +https://huggingface.co/nicholasKluge/bert-base-wikitext \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_based_ner_models_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_based_ner_models_en.md new file mode 100644 index 00000000000000..281aeae64de7c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_based_ner_models_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_based_ner_models BertEmbeddings from pragnakalp +author: John Snow Labs +name: bert_based_ner_models +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_based_ner_models` is a English model originally trained by pragnakalp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_based_ner_models_en_5.1.1_3.0_1694662008683.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_based_ner_models_en_5.1.1_3.0_1694662008683.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_based_ner_models","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_based_ner_models", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_based_ner_models| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/pragnakalp/bert_based_ner_models \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_bgl_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_bgl_en.md new file mode 100644 index 00000000000000..b66b669f0eade3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_bgl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_bgl BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: bert_bgl +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_bgl` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_bgl_en_5.1.1_3.0_1694673772401.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_bgl_en_5.1.1_3.0_1694673772401.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_bgl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_bgl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_bgl| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/BERT-BGL \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_csic_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_csic_en.md new file mode 100644 index 00000000000000..05d72f2bc7620f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_csic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_csic BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: bert_csic +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_csic` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_csic_en_5.1.1_3.0_1694673550522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_csic_en_5.1.1_3.0_1694673550522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_csic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_csic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_csic| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/BERT-CSIC \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8_en.md new file mode 100644 index 00000000000000..d0d5ea09b7456f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8_en_5.1.1_3.0_1694692978470.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8_en_5.1.1_3.0_1694692978470.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_8| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-8 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9_en.md new file mode 100644 index 00000000000000..3afaddd837f652 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9_en_5.1.1_3.0_1694693264712.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9_en_5.1.1_3.0_1694693264712.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_no_label_40_2nd_test_LR10_8_9| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-9 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10_en.md new file mode 100644 index 00000000000000..cf5335e2fe1041 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10_en_5.1.1_3.0_1694657886347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10_en_5.1.1_3.0_1694657886347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_10| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-10 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11_en.md new file mode 100644 index 00000000000000..cabfb54e78437c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11_en_5.1.1_3.0_1694658960750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11_en_5.1.1_3.0_1694658960750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_11| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-11 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12_en.md new file mode 100644 index 00000000000000..89d91289fa6aae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12_en_5.1.1_3.0_1694660226986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12_en_5.1.1_3.0_1694660226986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_12| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-12 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13_en.md new file mode 100644 index 00000000000000..1f323513727c72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13_en_5.1.1_3.0_1694660823117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13_en_5.1.1_3.0_1694660823117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_13| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-13 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20_en.md new file mode 100644 index 00000000000000..31f98a8bf31143 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20_en_5.1.1_3.0_1694661462547.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20_en_5.1.1_3.0_1694661462547.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_20| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-20 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5_en.md new file mode 100644 index 00000000000000..8b0420e5ba0fee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5_en_5.1.1_3.0_1694654880451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5_en_5.1.1_3.0_1694654880451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_5| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6_en.md new file mode 100644 index 00000000000000..5a4eaa348304a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6_en_5.1.1_3.0_1694655730374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6_en_5.1.1_3.0_1694655730374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_6| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-6 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7_en.md new file mode 100644 index 00000000000000..ca225039f9ad3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7 BertEmbeddings from jojoUla +author: John Snow Labs +name: bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7` is a English model originally trained by jojoUla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7_en_5.1.1_3.0_1694656705176.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7_en_5.1.1_3.0_1694656705176.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_cased_sigir_support_refute_norwegian_label_40_2nd_test_lr10_8_7| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.2 GB| + +## References + +https://huggingface.co/jojoUla/bert-large-cased-sigir-support-refute-no-label-40-2nd-test-LR10-8-7 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_nordic_pile_1m_steps_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_nordic_pile_1m_steps_en.md new file mode 100644 index 00000000000000..7244215eb4cd7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_nordic_pile_1m_steps_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_nordic_pile_1m_steps BertEmbeddings from timpal0l +author: John Snow Labs +name: bert_large_nordic_pile_1m_steps +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_nordic_pile_1m_steps` is a English model originally trained by timpal0l. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_nordic_pile_1m_steps_en_5.1.1_3.0_1694666363768.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_nordic_pile_1m_steps_en_5.1.1_3.0_1694666363768.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_nordic_pile_1m_steps","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_nordic_pile_1m_steps", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_nordic_pile_1m_steps| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.4 GB| + +## References + +https://huggingface.co/timpal0l/bert-large-nordic-pile-1M-steps \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_nordic_pile_1m_steps_sv.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_nordic_pile_1m_steps_sv.md new file mode 100644 index 00000000000000..d3b6c68bf52137 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_nordic_pile_1m_steps_sv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Swedish bert_large_nordic_pile_1m_steps BertEmbeddings from AI-Sweden-Models +author: John Snow Labs +name: bert_large_nordic_pile_1m_steps +date: 2023-09-14 +tags: [bert, sv, open_source, fill_mask, onnx] +task: Embeddings +language: sv +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_nordic_pile_1m_steps` is a Swedish model originally trained by AI-Sweden-Models. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_nordic_pile_1m_steps_sv_5.1.1_3.0_1694666623839.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_nordic_pile_1m_steps_sv_5.1.1_3.0_1694666623839.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_nordic_pile_1m_steps","sv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_nordic_pile_1m_steps", "sv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_nordic_pile_1m_steps| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|sv| +|Size:|1.4 GB| + +## References + +https://huggingface.co/AI-Sweden-Models/bert-large-nordic-pile-1M-steps \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_large_stackoverflow_comments_1m_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_large_stackoverflow_comments_1m_en.md new file mode 100644 index 00000000000000..97a55a6c69aebc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_large_stackoverflow_comments_1m_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_large_stackoverflow_comments_1m BertEmbeddings from giganticode +author: John Snow Labs +name: bert_large_stackoverflow_comments_1m +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_large_stackoverflow_comments_1m` is a English model originally trained by giganticode. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_large_stackoverflow_comments_1m_en_5.1.1_3.0_1694650551787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_large_stackoverflow_comments_1m_en_5.1.1_3.0_1694650551787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_large_stackoverflow_comments_1m","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_large_stackoverflow_comments_1m", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_large_stackoverflow_comments_1m| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/giganticode/bert-large-StackOverflow-comments_1M \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_multilang_finetune_bangla_summarization_dataset_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_multilang_finetune_bangla_summarization_dataset_en.md new file mode 100644 index 00000000000000..bc6fbe813d6aff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_multilang_finetune_bangla_summarization_dataset_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_multilang_finetune_bangla_summarization_dataset BertEmbeddings from arbitropy +author: John Snow Labs +name: bert_multilang_finetune_bangla_summarization_dataset +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_multilang_finetune_bangla_summarization_dataset` is a English model originally trained by arbitropy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_multilang_finetune_bangla_summarization_dataset_en_5.1.1_3.0_1694667323120.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_multilang_finetune_bangla_summarization_dataset_en_5.1.1_3.0_1694667323120.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_multilang_finetune_bangla_summarization_dataset","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_multilang_finetune_bangla_summarization_dataset", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_multilang_finetune_bangla_summarization_dataset| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/arbitropy/bert-multilang-finetune-bangla-summarization-dataset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_nlp_project_google_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_nlp_project_google_en.md new file mode 100644 index 00000000000000..5ef87ba213725d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_nlp_project_google_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_nlp_project_google BertEmbeddings from jestemleon +author: John Snow Labs +name: bert_nlp_project_google +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_nlp_project_google` is a English model originally trained by jestemleon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_nlp_project_google_en_5.1.1_3.0_1694661280346.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_nlp_project_google_en_5.1.1_3.0_1694661280346.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_nlp_project_google","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_nlp_project_google", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_nlp_project_google| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/jestemleon/bert-nlp-project-google \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_nlp_project_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_nlp_project_imdb_en.md new file mode 100644 index 00000000000000..3cd3ae7bdcc554 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_nlp_project_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_nlp_project_imdb BertEmbeddings from jestemleon +author: John Snow Labs +name: bert_nlp_project_imdb +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_nlp_project_imdb` is a English model originally trained by jestemleon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_nlp_project_imdb_en_5.1.1_3.0_1694659623230.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_nlp_project_imdb_en_5.1.1_3.0_1694659623230.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_nlp_project_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_nlp_project_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_nlp_project_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/jestemleon/bert-nlp-project-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_pkdd_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_pkdd_en.md new file mode 100644 index 00000000000000..618789e42208b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_pkdd_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pkdd BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: bert_pkdd +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pkdd` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pkdd_en_5.1.1_3.0_1694673662056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pkdd_en_5.1.1_3.0_1694673662056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_pkdd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_pkdd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pkdd| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/BERT-PKDD \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_spirit_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_spirit_en.md new file mode 100644 index 00000000000000..67b64e4cedec35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_spirit_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_spirit BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: bert_spirit +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_spirit` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_spirit_en_5.1.1_3.0_1694673992319.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_spirit_en_5.1.1_3.0_1694673992319.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_spirit","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_spirit", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_spirit| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/BERT-Spirit \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_system_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_system_en.md new file mode 100644 index 00000000000000..9a861bdd9c3c82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_system_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_system BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: bert_system +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_system` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_system_en_5.1.1_3.0_1694654994124.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_system_en_5.1.1_3.0_1694654994124.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_system","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_system", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_system| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/BERT-System \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_thunderbird_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_thunderbird_en.md new file mode 100644 index 00000000000000..895ac1823d2833 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_thunderbird_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_thunderbird BertEmbeddings from EgilKarlsen +author: John Snow Labs +name: bert_thunderbird +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_thunderbird` is a English model originally trained by EgilKarlsen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_thunderbird_en_5.1.1_3.0_1694673882784.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_thunderbird_en_5.1.1_3.0_1694673882784.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_thunderbird","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_thunderbird", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_thunderbird| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/EgilKarlsen/BERT-Thunderbird \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bert_ucb_v1_en.md b/docs/_posts/ahmedlone127/2023-09-14-bert_ucb_v1_en.md new file mode 100644 index 00000000000000..d75034ad344dfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bert_ucb_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_ucb_v1 BertEmbeddings from Diegomejia +author: John Snow Labs +name: bert_ucb_v1 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_ucb_v1` is a English model originally trained by Diegomejia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_ucb_v1_en_5.1.1_3.0_1694664280264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_ucb_v1_en_5.1.1_3.0_1694664280264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bert_ucb_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bert_ucb_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_ucb_v1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Diegomejia/bert-ucb-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bertimbau_pt.md b/docs/_posts/ahmedlone127/2023-09-14-bertimbau_pt.md new file mode 100644 index 00000000000000..6a2709eb243966 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bertimbau_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertimbau BertEmbeddings from tubyneto +author: John Snow Labs +name: bertimbau +date: 2023-09-14 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertimbau` is a Portuguese model originally trained by tubyneto. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertimbau_pt_5.1.1_3.0_1694664877599.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertimbau_pt_5.1.1_3.0_1694664877599.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertimbau","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertimbau", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertimbau| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|405.9 MB| + +## References + +https://huggingface.co/tubyneto/bertimbau \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bertino_lsg_en.md b/docs/_posts/ahmedlone127/2023-09-14-bertino_lsg_en.md new file mode 100644 index 00000000000000..9b3c412bd78a64 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bertino_lsg_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertino_lsg DistilBertEmbeddings from efederici +author: John Snow Labs +name: bertino_lsg +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertino_lsg` is a English model originally trained by efederici. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertino_lsg_en_5.1.2_3.0_1694735307524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertino_lsg_en_5.1.2_3.0_1694735307524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bertino_lsg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bertino_lsg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertino_lsg| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|263.3 MB| + +## References + +https://huggingface.co/efederici/bertino-lsg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bertugues_base_portuguese_cased_pt.md b/docs/_posts/ahmedlone127/2023-09-14-bertugues_base_portuguese_cased_pt.md new file mode 100644 index 00000000000000..28838eb95b7cf8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bertugues_base_portuguese_cased_pt.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Portuguese bertugues_base_portuguese_cased BertEmbeddings from ricardoz +author: John Snow Labs +name: bertugues_base_portuguese_cased +date: 2023-09-14 +tags: [bert, pt, open_source, fill_mask, onnx] +task: Embeddings +language: pt +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertugues_base_portuguese_cased` is a Portuguese model originally trained by ricardoz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertugues_base_portuguese_cased_pt_5.1.1_3.0_1694650332853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertugues_base_portuguese_cased_pt_5.1.1_3.0_1694650332853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bertugues_base_portuguese_cased","pt") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bertugues_base_portuguese_cased", "pt") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertugues_base_portuguese_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|pt| +|Size:|408.1 MB| + +## References + +https://huggingface.co/ricardoz/BERTugues-base-portuguese-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bibert_v0.1_en.md b/docs/_posts/ahmedlone127/2023-09-14-bibert_v0.1_en.md new file mode 100644 index 00000000000000..c5d5aa8e4af490 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bibert_v0.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bibert_v0.1 BertEmbeddings from yugen-ok +author: John Snow Labs +name: bibert_v0.1 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bibert_v0.1` is a English model originally trained by yugen-ok. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bibert_v0.1_en_5.1.1_3.0_1694666746036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bibert_v0.1_en_5.1.1_3.0_1694666746036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bibert_v0.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bibert_v0.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bibert_v0.1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/yugen-ok/bibert-v0.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-biobit_it.md b/docs/_posts/ahmedlone127/2023-09-14-biobit_it.md new file mode 100644 index 00000000000000..c13cd6256893c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-biobit_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian biobit BertEmbeddings from IVN-RIN +author: John Snow Labs +name: biobit +date: 2023-09-14 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biobit` is a Italian model originally trained by IVN-RIN. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biobit_it_5.1.1_3.0_1694659298869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biobit_it_5.1.1_3.0_1694659298869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biobit","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biobit", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biobit| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.2 MB| + +## References + +https://huggingface.co/IVN-RIN/bioBIT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-biomedvlp_cxr_bert_general_en.md b/docs/_posts/ahmedlone127/2023-09-14-biomedvlp_cxr_bert_general_en.md new file mode 100644 index 00000000000000..a7f4d323893c69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-biomedvlp_cxr_bert_general_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English biomedvlp_cxr_bert_general BertEmbeddings from microsoft +author: John Snow Labs +name: biomedvlp_cxr_bert_general +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`biomedvlp_cxr_bert_general` is a English model originally trained by microsoft. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/biomedvlp_cxr_bert_general_en_5.1.1_3.0_1694659655941.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/biomedvlp_cxr_bert_general_en_5.1.1_3.0_1694659655941.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("biomedvlp_cxr_bert_general","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("biomedvlp_cxr_bert_general", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|biomedvlp_cxr_bert_general| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|260.2 MB| + +## References + +https://huggingface.co/microsoft/BiomedVLP-CXR-BERT-general \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-bnlp_tokenizer_paraphrase_mlm_bert_900001_en.md b/docs/_posts/ahmedlone127/2023-09-14-bnlp_tokenizer_paraphrase_mlm_bert_900001_en.md new file mode 100644 index 00000000000000..44cf910d36bcc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-bnlp_tokenizer_paraphrase_mlm_bert_900001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bnlp_tokenizer_paraphrase_mlm_bert_900001 BertEmbeddings from arbitropy +author: John Snow Labs +name: bnlp_tokenizer_paraphrase_mlm_bert_900001 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bnlp_tokenizer_paraphrase_mlm_bert_900001` is a English model originally trained by arbitropy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bnlp_tokenizer_paraphrase_mlm_bert_900001_en_5.1.1_3.0_1694662292369.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bnlp_tokenizer_paraphrase_mlm_bert_900001_en_5.1.1_3.0_1694662292369.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("bnlp_tokenizer_paraphrase_mlm_bert_900001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("bnlp_tokenizer_paraphrase_mlm_bert_900001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bnlp_tokenizer_paraphrase_mlm_bert_900001| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/arbitropy/bnlp-tokenizer-paraphrase-mlm-bert-900001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-burmese_bert_model_en.md b/docs/_posts/ahmedlone127/2023-09-14-burmese_bert_model_en.md new file mode 100644 index 00000000000000..c6c1ed507d66d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-burmese_bert_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_bert_model BertEmbeddings from billfass +author: John Snow Labs +name: burmese_bert_model +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_bert_model` is a English model originally trained by billfass. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_bert_model_en_5.1.1_3.0_1694675726360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_bert_model_en_5.1.1_3.0_1694675726360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("burmese_bert_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("burmese_bert_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_bert_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|663.2 MB| + +## References + +https://huggingface.co/billfass/my_bert_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-clinicaltrialbiobert_en.md b/docs/_posts/ahmedlone127/2023-09-14-clinicaltrialbiobert_en.md new file mode 100644 index 00000000000000..0ec0ca24b6d969 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-clinicaltrialbiobert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicaltrialbiobert BertEmbeddings from domenicrosati +author: John Snow Labs +name: clinicaltrialbiobert +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicaltrialbiobert` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicaltrialbiobert_en_5.1.1_3.0_1694660034709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicaltrialbiobert_en_5.1.1_3.0_1694660034709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("clinicaltrialbiobert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("clinicaltrialbiobert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicaltrialbiobert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.8 MB| + +## References + +https://huggingface.co/domenicrosati/ClinicalTrialBioBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-closure_system_door_inne_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-14-closure_system_door_inne_bert_base_uncased_en.md new file mode 100644 index 00000000000000..dd9a2cd0616120 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-closure_system_door_inne_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English closure_system_door_inne_bert_base_uncased BertEmbeddings from Davincilee +author: John Snow Labs +name: closure_system_door_inne_bert_base_uncased +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`closure_system_door_inne_bert_base_uncased` is a English model originally trained by Davincilee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/closure_system_door_inne_bert_base_uncased_en_5.1.1_3.0_1694654562553.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/closure_system_door_inne_bert_base_uncased_en_5.1.1_3.0_1694654562553.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("closure_system_door_inne_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("closure_system_door_inne_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|closure_system_door_inne_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Davincilee/closure_system_door_inne-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-coronasentana_en.md b/docs/_posts/ahmedlone127/2023-09-14-coronasentana_en.md new file mode 100644 index 00000000000000..870dbc60f9b555 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-coronasentana_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English coronasentana BertEmbeddings from Peed911 +author: John Snow Labs +name: coronasentana +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`coronasentana` is a English model originally trained by Peed911. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/coronasentana_en_5.1.1_3.0_1694668810910.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/coronasentana_en_5.1.1_3.0_1694668810910.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("coronasentana","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("coronasentana", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|coronasentana| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/Peed911/CoronaSentAna \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-customer_data_tuned_trial_1_en.md b/docs/_posts/ahmedlone127/2023-09-14-customer_data_tuned_trial_1_en.md new file mode 100644 index 00000000000000..a9b0dfa807b05a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-customer_data_tuned_trial_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English customer_data_tuned_trial_1 DistilBertEmbeddings from EslamAhmed +author: John Snow Labs +name: customer_data_tuned_trial_1 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`customer_data_tuned_trial_1` is a English model originally trained by EslamAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/customer_data_tuned_trial_1_en_5.1.2_3.0_1694734877512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/customer_data_tuned_trial_1_en_5.1.2_3.0_1694734877512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("customer_data_tuned_trial_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("customer_data_tuned_trial_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|customer_data_tuned_trial_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/EslamAhmed/customer_data_tuned_trial_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-dbert_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-14-dbert_finetuned_en.md new file mode 100644 index 00000000000000..93876e4433068d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-dbert_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dbert_finetuned DistilBertEmbeddings from ksabeh +author: John Snow Labs +name: dbert_finetuned +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbert_finetuned` is a English model originally trained by ksabeh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbert_finetuned_en_5.1.2_3.0_1694735275130.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbert_finetuned_en_5.1.2_3.0_1694735275130.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("dbert_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("dbert_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbert_finetuned| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ksabeh/dbert-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-debiasing_pre_trained_contextualised_embeddings_distil_bert_en.md b/docs/_posts/ahmedlone127/2023-09-14-debiasing_pre_trained_contextualised_embeddings_distil_bert_en.md new file mode 100644 index 00000000000000..083d8a1ca717b2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-debiasing_pre_trained_contextualised_embeddings_distil_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English debiasing_pre_trained_contextualised_embeddings_distil_bert DistilBertEmbeddings from Daniel-Saeedi +author: John Snow Labs +name: debiasing_pre_trained_contextualised_embeddings_distil_bert +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`debiasing_pre_trained_contextualised_embeddings_distil_bert` is a English model originally trained by Daniel-Saeedi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/debiasing_pre_trained_contextualised_embeddings_distil_bert_en_5.1.2_3.0_1694734406797.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/debiasing_pre_trained_contextualised_embeddings_distil_bert_en_5.1.2_3.0_1694734406797.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("debiasing_pre_trained_contextualised_embeddings_distil_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("debiasing_pre_trained_contextualised_embeddings_distil_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|debiasing_pre_trained_contextualised_embeddings_distil_bert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Daniel-Saeedi/debiasing_pre-trained_contextualised_embeddings_distil_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-dictabert_he.md b/docs/_posts/ahmedlone127/2023-09-14-dictabert_he.md new file mode 100644 index 00000000000000..832d40238defcf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-dictabert_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew dictabert BertEmbeddings from dicta-il +author: John Snow Labs +name: dictabert +date: 2023-09-14 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dictabert` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dictabert_he_5.1.1_3.0_1694668081428.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dictabert_he_5.1.1_3.0_1694668081428.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dictabert","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dictabert", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dictabert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|440.2 MB| + +## References + +https://huggingface.co/dicta-il/dictabert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-dictabert_morph_he.md b/docs/_posts/ahmedlone127/2023-09-14-dictabert_morph_he.md new file mode 100644 index 00000000000000..c9fda4c69f2b7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-dictabert_morph_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew dictabert_morph BertEmbeddings from dicta-il +author: John Snow Labs +name: dictabert_morph +date: 2023-09-14 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dictabert_morph` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dictabert_morph_he_5.1.1_3.0_1694668253883.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dictabert_morph_he_5.1.1_3.0_1694668253883.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dictabert_morph","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dictabert_morph", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dictabert_morph| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|639.8 MB| + +## References + +https://huggingface.co/dicta-il/dictabert-morph \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-dictabert_seg_he.md b/docs/_posts/ahmedlone127/2023-09-14-dictabert_seg_he.md new file mode 100644 index 00000000000000..1d928a59e56a6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-dictabert_seg_he.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Hebrew dictabert_seg BertEmbeddings from dicta-il +author: John Snow Labs +name: dictabert_seg +date: 2023-09-14 +tags: [bert, he, open_source, fill_mask, onnx] +task: Embeddings +language: he +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dictabert_seg` is a Hebrew model originally trained by dicta-il. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dictabert_seg_he_5.1.1_3.0_1694667839991.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dictabert_seg_he_5.1.1_3.0_1694667839991.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dictabert_seg","he") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dictabert_seg", "he") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dictabert_seg| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|he| +|Size:|657.5 MB| + +## References + +https://huggingface.co/dicta-il/dictabert-seg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_25lang_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_25lang_cased_xx.md new file mode 100644 index 00000000000000..28b38846c3cf87 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_25lang_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual distilbert_base_25lang_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_25lang_cased +date: 2023-09-14 +tags: [distilbert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_25lang_cased` is a Multilingual model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_25lang_cased_xx_5.1.2_3.0_1694735571253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_25lang_cased_xx_5.1.2_3.0_1694735571253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_25lang_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_25lang_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_25lang_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|405.6 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-25lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_german_cased_de.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_german_cased_de.md new file mode 100644 index 00000000000000..d7e9c4f2aa5414 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_german_cased_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German distilbert_base_german_cased DistilBertEmbeddings from huggingface +author: John Snow Labs +name: distilbert_base_german_cased +date: 2023-09-14 +tags: [distilbert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_german_cased` is a German model originally trained by huggingface. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_de_5.1.2_3.0_1694734364944.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_de_5.1.2_3.0_1694734364944.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_german_cased","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_german_cased", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_german_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|250.3 MB| + +## References + +https://huggingface.co/distilbert-base-german-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_german_cased_finetuned_amazon_reviews_de.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_german_cased_finetuned_amazon_reviews_de.md new file mode 100644 index 00000000000000..d06a88ad176f8e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_german_cased_finetuned_amazon_reviews_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German distilbert_base_german_cased_finetuned_amazon_reviews DistilBertEmbeddings from mariav +author: John Snow Labs +name: distilbert_base_german_cased_finetuned_amazon_reviews +date: 2023-09-14 +tags: [distilbert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_german_cased_finetuned_amazon_reviews` is a German model originally trained by mariav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_finetuned_amazon_reviews_de_5.1.2_3.0_1694735975936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_finetuned_amazon_reviews_de_5.1.2_3.0_1694735975936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_german_cased_finetuned_amazon_reviews","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_german_cased_finetuned_amazon_reviews", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_german_cased_finetuned_amazon_reviews| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|250.3 MB| + +## References + +https://huggingface.co/mariav/distilbert-base-german-cased-finetuned-amazon-reviews \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_multilingual_cased_finetuned_kintweetsE_xx.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_multilingual_cased_finetuned_kintweetsE_xx.md new file mode 100644 index 00000000000000..020608722279aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_multilingual_cased_finetuned_kintweetsE_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_finetuned_kintweetsE DistilBertEmbeddings from RogerB +author: John Snow Labs +name: distilbert_base_multilingual_cased_finetuned_kintweetsE +date: 2023-09-14 +tags: [distilbert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_finetuned_kintweetsE` is a Multilingual model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_kintweetsE_xx_5.1.2_3.0_1694735872200.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_kintweetsE_xx_5.1.2_3.0_1694735872200.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased_finetuned_kintweetsE","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_multilingual_cased_finetuned_kintweetsE", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_finetuned_kintweetsE| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/RogerB/distilbert-base-multilingual-cased-finetuned-kintweetsE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_spanish_uncased_es.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_spanish_uncased_es.md new file mode 100644 index 00000000000000..c0b42334bbb1f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_spanish_uncased_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish distilbert_base_spanish_uncased DistilBertEmbeddings from dccuchile +author: John Snow Labs +name: distilbert_base_spanish_uncased +date: 2023-09-14 +tags: [distilbert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_spanish_uncased` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_es_5.1.2_3.0_1694734760876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_es_5.1.2_3.0_1694734760876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_spanish_uncased","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_spanish_uncased", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_spanish_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|250.2 MB| + +## References + +https://huggingface.co/dccuchile/distilbert-base-spanish-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_aisera_texts_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_aisera_texts_en.md new file mode 100644 index 00000000000000..f6b77679f58191 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_aisera_texts_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_aisera_texts DistilBertEmbeddings from Theimisa +author: John Snow Labs +name: distilbert_base_uncased_aisera_texts +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_aisera_texts` is a English model originally trained by Theimisa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_aisera_texts_en_5.1.2_3.0_1694735304909.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_aisera_texts_en_5.1.2_3.0_1694735304909.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_aisera_texts","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_aisera_texts", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_aisera_texts| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Theimisa/distilbert-base-uncased-aisera_texts \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_aisera_texts_v3_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_aisera_texts_v3_en.md new file mode 100644 index 00000000000000..b1058d44cb37fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_aisera_texts_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_aisera_texts_v3 DistilBertEmbeddings from Theimisa +author: John Snow Labs +name: distilbert_base_uncased_aisera_texts_v3 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_aisera_texts_v3` is a English model originally trained by Theimisa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_aisera_texts_v3_en_5.1.2_3.0_1694735601782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_aisera_texts_v3_en_5.1.2_3.0_1694735601782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_aisera_texts_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_aisera_texts_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_aisera_texts_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Theimisa/distilbert-base-uncased-aisera_texts-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned2_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned2_imdb_en.md new file mode 100644 index 00000000000000..ebd1686949d1eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned2_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned2_imdb DistilBertEmbeddings from Ghost1 +author: John Snow Labs +name: distilbert_base_uncased_finetuned2_imdb +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned2_imdb` is a English model originally trained by Ghost1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned2_imdb_en_5.1.2_3.0_1694734565679.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned2_imdb_en_5.1.2_3.0_1694734565679.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned2_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned2_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned2_imdb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ghost1/distilbert-base-uncased-finetuned2-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_CT_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_CT_en.md new file mode 100644 index 00000000000000..f0e25573804f94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_CT_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_CT DistilBertEmbeddings from anthonyyazdani +author: John Snow Labs +name: distilbert_base_uncased_finetuned_CT +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_CT` is a English model originally trained by anthonyyazdani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_CT_en_5.1.2_3.0_1694735035198.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_CT_en_5.1.2_3.0_1694735035198.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_CT","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_CT", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_CT| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/anthonyyazdani/distilbert-base-uncased-finetuned-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_cvent_2022_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_cvent_2022_en.md new file mode 100644 index 00000000000000..aa0915433f36f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_cvent_2022_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_cvent_2022 DistilBertEmbeddings from vives +author: John Snow Labs +name: distilbert_base_uncased_finetuned_cvent_2022 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_cvent_2022` is a English model originally trained by vives. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cvent_2022_en_5.1.2_3.0_1694735949691.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cvent_2022_en_5.1.2_3.0_1694735949691.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_cvent_2022","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_cvent_2022", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_cvent_2022| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vives/distilbert-base-uncased-finetuned-cvent-2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_domain_adaptation_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_domain_adaptation_en.md new file mode 100644 index 00000000000000..b255e71e1dff7c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_domain_adaptation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_domain_adaptation DistilBertEmbeddings from algiraldohe +author: John Snow Labs +name: distilbert_base_uncased_finetuned_domain_adaptation +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_domain_adaptation` is a English model originally trained by algiraldohe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_domain_adaptation_en_5.1.2_3.0_1694734723970.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_domain_adaptation_en_5.1.2_3.0_1694734723970.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_domain_adaptation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_domain_adaptation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_domain_adaptation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/algiraldohe/distilbert-base-uncased-finetuned-domain-adaptation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_accelerate_en.md new file mode 100644 index 00000000000000..2c740e9ca86528 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate DistilBertEmbeddings from liquannan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate` is a English model originally trained by liquannan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_en_5.1.2_3.0_1694734614129.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_en_5.1.2_3.0_1694734614129.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/liquannan/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..e02be7a94464bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb DistilBertEmbeddings from sabby +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb` is a English model originally trained by sabby. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_en_5.1.2_3.0_1694734499155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_en_5.1.2_3.0_1694734499155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sabby/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_whole_word_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_whole_word_en.md new file mode 100644 index 00000000000000..7db36d8f23f0b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_imdb_whole_word_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_whole_word DistilBertEmbeddings from PhysHunter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_whole_word +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_whole_word` is a English model originally trained by PhysHunter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_whole_word_en_5.1.2_3.0_1694735407983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_whole_word_en_5.1.2_3.0_1694735407983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_whole_word","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_whole_word", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_whole_word| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/PhysHunter/distilbert-base-uncased-finetuned-imdb-whole-word \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_kintweetsE_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_kintweetsE_en.md new file mode 100644 index 00000000000000..f39ccf9f0cf8be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_kintweetsE_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_kintweetsE DistilBertEmbeddings from RogerB +author: John Snow Labs +name: distilbert_base_uncased_finetuned_kintweetsE +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_kintweetsE` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_kintweetsE_en_5.1.2_3.0_1694735711522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_kintweetsE_en_5.1.2_3.0_1694735711522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_kintweetsE","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_kintweetsE", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_kintweetsE| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RogerB/distilbert-base-uncased-finetuned-kintweetsE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_mlm_1_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_mlm_1_en.md new file mode 100644 index 00000000000000..cfc0deb7ec7c34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_mlm_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_mlm_1 DistilBertEmbeddings from aarroonn22 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_mlm_1 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_mlm_1` is a English model originally trained by aarroonn22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_mlm_1_en_5.1.2_3.0_1694735063576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_mlm_1_en_5.1.2_3.0_1694735063576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_mlm_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_mlm_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_mlm_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/aarroonn22/distilbert-base-uncased-finetuned-mlm-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_mlm_2_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_mlm_2_en.md new file mode 100644 index 00000000000000..51cc5f5702115c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_mlm_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_mlm_2 DistilBertEmbeddings from aarroonn22 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_mlm_2 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_mlm_2` is a English model originally trained by aarroonn22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_mlm_2_en_5.1.2_3.0_1694734954635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_mlm_2_en_5.1.2_3.0_1694734954635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_mlm_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_mlm_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_mlm_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/aarroonn22/distilbert-base-uncased-finetuned-mlm-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_nitro_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_nitro_en.md new file mode 100644 index 00000000000000..ac84a61ffad808 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_nitro_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_nitro DistilBertEmbeddings from dieexbr +author: John Snow Labs +name: distilbert_base_uncased_finetuned_nitro +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_nitro` is a English model originally trained by dieexbr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_nitro_en_5.1.2_3.0_1694735152823.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_nitro_en_5.1.2_3.0_1694735152823.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_nitro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_nitro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_nitro| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dieexbr/distilbert-base-uncased-finetuned-nitro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_recipe_accelerate_1_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_recipe_accelerate_1_en.md new file mode 100644 index 00000000000000..4762deca6f5c34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_recipe_accelerate_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_recipe_accelerate_1 DistilBertEmbeddings from CennetOguz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_recipe_accelerate_1 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_recipe_accelerate_1` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipe_accelerate_1_en_5.1.2_3.0_1694734929488.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipe_accelerate_1_en_5.1.2_3.0_1694734929488.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_recipe_accelerate_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_recipe_accelerate_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_recipe_accelerate_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/CennetOguz/distilbert-base-uncased-finetuned-recipe-accelerate-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_recipe_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_recipe_accelerate_en.md new file mode 100644 index 00000000000000..6413138f4702d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_recipe_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_recipe_accelerate DistilBertEmbeddings from CennetOguz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_recipe_accelerate +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_recipe_accelerate` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipe_accelerate_en_5.1.2_3.0_1694735054267.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipe_accelerate_en_5.1.2_3.0_1694735054267.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_recipe_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_recipe_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_recipe_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/CennetOguz/distilbert-base-uncased-finetuned-recipe-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_speeches_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_speeches_en.md new file mode 100644 index 00000000000000..dde8b49ff246fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_speeches_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_speeches DistilBertEmbeddings from peterday +author: John Snow Labs +name: distilbert_base_uncased_finetuned_speeches +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_speeches` is a English model originally trained by peterday. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_speeches_en_5.1.2_3.0_1694734918396.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_speeches_en_5.1.2_3.0_1694734918396.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_speeches","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_speeches", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_speeches| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/peterday/distilbert-base-uncased-finetuned-speeches \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_squad_d5716d28_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_squad_d5716d28_en.md new file mode 100644 index 00000000000000..a1389289293835 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_finetuned_squad_d5716d28_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28 DistilBertEmbeddings from ysugawa +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28` is a English model originally trained by ysugawa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_en_5.1.2_3.0_1694734604331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_en_5.1.2_3.0_1694734604331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ysugawa/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_linkedin_domain_adaptation_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_linkedin_domain_adaptation_en.md new file mode 100644 index 00000000000000..bd21c33cb2c48e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_base_uncased_linkedin_domain_adaptation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_linkedin_domain_adaptation DistilBertEmbeddings from algiraldohe +author: John Snow Labs +name: distilbert_base_uncased_linkedin_domain_adaptation +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_linkedin_domain_adaptation` is a English model originally trained by algiraldohe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_linkedin_domain_adaptation_en_5.1.2_3.0_1694735170425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_linkedin_domain_adaptation_en_5.1.2_3.0_1694735170425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_linkedin_domain_adaptation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_linkedin_domain_adaptation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_linkedin_domain_adaptation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/algiraldohe/distilbert-base-uncased-linkedin-domain-adaptation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_embeddings_clinical_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_embeddings_clinical_en.md new file mode 100644 index 00000000000000..a99e07c322312b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_embeddings_clinical_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English Bert Embeddings model (from nlpie) +author: John Snow Labs +name: distilbert_embeddings_clinical +date: 2023-09-14 +tags: [open_source, distilbert, distilbert_embeddings, distilbertformaskedlm, en, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForMaskedLM model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `clinical-distilbert` is a English model originally trained by `nlpie`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_clinical_en_5.1.2_3.0_1694735741619.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_clinical_en_5.1.2_3.0_1694735741619.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_clinical","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["I love Spark-NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_clinical","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(True) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("I love Spark-NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_embeddings_clinical| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.6 MB| +|Case sensitive:|false| + +## References + +References + +https://huggingface.co/nlpie/clinical-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_fa_zwnj_base_MLM_pquad_2_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_fa_zwnj_base_MLM_pquad_2_en.md new file mode 100644 index 00000000000000..1031457c93e365 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_fa_zwnj_base_MLM_pquad_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_fa_zwnj_base_MLM_pquad_2 DistilBertEmbeddings from Gholamreza +author: John Snow Labs +name: distilbert_fa_zwnj_base_MLM_pquad_2 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_fa_zwnj_base_MLM_pquad_2` is a English model originally trained by Gholamreza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_fa_zwnj_base_MLM_pquad_2_en_5.1.2_3.0_1694734812480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_fa_zwnj_base_MLM_pquad_2_en_5.1.2_3.0_1694734812480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_fa_zwnj_base_MLM_pquad_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_fa_zwnj_base_MLM_pquad_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_fa_zwnj_base_MLM_pquad_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|282.0 MB| + +## References + +https://huggingface.co/Gholamreza/distilbert-fa-zwnj-base-MLM-pquad_2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_fa_zwnj_base_MLM_pquad_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_fa_zwnj_base_MLM_pquad_en.md new file mode 100644 index 00000000000000..1f8209075e4924 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_fa_zwnj_base_MLM_pquad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_fa_zwnj_base_MLM_pquad DistilBertEmbeddings from Gholamreza +author: John Snow Labs +name: distilbert_fa_zwnj_base_MLM_pquad +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_fa_zwnj_base_MLM_pquad` is a English model originally trained by Gholamreza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_fa_zwnj_base_MLM_pquad_en_5.1.2_3.0_1694734709513.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_fa_zwnj_base_MLM_pquad_en_5.1.2_3.0_1694734709513.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_fa_zwnj_base_MLM_pquad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_fa_zwnj_base_MLM_pquad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_fa_zwnj_base_MLM_pquad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|281.8 MB| + +## References + +https://huggingface.co/Gholamreza/distilbert-fa-zwnj-base-MLM-pquad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distilbert_v1_en.md b/docs/_posts/ahmedlone127/2023-09-14-distilbert_v1_en.md new file mode 100644 index 00000000000000..1b4e103da6b3d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distilbert_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_v1 DistilBertEmbeddings from Amirosein +author: John Snow Labs +name: distilbert_v1 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_v1` is a English model originally trained by Amirosein. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_v1_en_5.1.2_3.0_1694734658336.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_v1_en_5.1.2_3.0_1694734658336.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|276.0 MB| + +## References + +https://huggingface.co/Amirosein/distilbert_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-distillbert_base_spanish_uncased_finetuned_spanish_corpus_en.md b/docs/_posts/ahmedlone127/2023-09-14-distillbert_base_spanish_uncased_finetuned_spanish_corpus_en.md new file mode 100644 index 00000000000000..45e1a309454f20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-distillbert_base_spanish_uncased_finetuned_spanish_corpus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distillbert_base_spanish_uncased_finetuned_spanish_corpus DistilBertEmbeddings from franfram +author: John Snow Labs +name: distillbert_base_spanish_uncased_finetuned_spanish_corpus +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_base_spanish_uncased_finetuned_spanish_corpus` is a English model originally trained by franfram. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_base_spanish_uncased_finetuned_spanish_corpus_en_5.1.2_3.0_1694734373857.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_base_spanish_uncased_finetuned_spanish_corpus_en_5.1.2_3.0_1694734373857.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distillbert_base_spanish_uncased_finetuned_spanish_corpus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distillbert_base_spanish_uncased_finetuned_spanish_corpus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_base_spanish_uncased_finetuned_spanish_corpus| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/franfram/distillbert-base-spanish-uncased-finetuned-spanish-corpus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-domain_adapted_arbert_goudma_bert_en.md b/docs/_posts/ahmedlone127/2023-09-14-domain_adapted_arbert_goudma_bert_en.md new file mode 100644 index 00000000000000..e5a0661b51bbf4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-domain_adapted_arbert_goudma_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English domain_adapted_arbert_goudma_bert BertEmbeddings from YassineToughrai +author: John Snow Labs +name: domain_adapted_arbert_goudma_bert +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`domain_adapted_arbert_goudma_bert` is a English model originally trained by YassineToughrai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/domain_adapted_arbert_goudma_bert_en_5.1.1_3.0_1694654039836.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/domain_adapted_arbert_goudma_bert_en_5.1.1_3.0_1694654039836.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("domain_adapted_arbert_goudma_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("domain_adapted_arbert_goudma_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|domain_adapted_arbert_goudma_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|605.0 MB| + +## References + +https://huggingface.co/YassineToughrai/Domain_adapted_ARBERT_GOUDMA_BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-door_inner_with_sa_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-14-door_inner_with_sa_bert_base_uncased_en.md new file mode 100644 index 00000000000000..e2e74acf1e1674 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-door_inner_with_sa_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English door_inner_with_sa_bert_base_uncased BertEmbeddings from Davincilee +author: John Snow Labs +name: door_inner_with_sa_bert_base_uncased +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`door_inner_with_sa_bert_base_uncased` is a English model originally trained by Davincilee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/door_inner_with_sa_bert_base_uncased_en_5.1.1_3.0_1694656043395.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/door_inner_with_sa_bert_base_uncased_en_5.1.1_3.0_1694656043395.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("door_inner_with_sa_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("door_inner_with_sa_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|door_inner_with_sa_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/Davincilee/door_inner_with_SA-bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-dummy_model_linbo_en.md b/docs/_posts/ahmedlone127/2023-09-14-dummy_model_linbo_en.md new file mode 100644 index 00000000000000..7f5458cb5f124f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-dummy_model_linbo_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_linbo BertEmbeddings from Linbo +author: John Snow Labs +name: dummy_model_linbo +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_linbo` is a English model originally trained by Linbo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_linbo_en_5.1.1_3.0_1694649822787.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_linbo_en_5.1.1_3.0_1694649822787.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_linbo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_linbo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_linbo| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Linbo/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-dummy_model_sasikarn_en.md b/docs/_posts/ahmedlone127/2023-09-14-dummy_model_sasikarn_en.md new file mode 100644 index 00000000000000..358517d641bf8e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-dummy_model_sasikarn_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model_sasikarn BertEmbeddings from Sasikarn +author: John Snow Labs +name: dummy_model_sasikarn +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model_sasikarn` is a English model originally trained by Sasikarn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_sasikarn_en_5.1.1_3.0_1694675578031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_sasikarn_en_5.1.1_3.0_1694675578031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dummy_model_sasikarn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dummy_model_sasikarn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model_sasikarn| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/Sasikarn/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-dzarabert_ar.md b/docs/_posts/ahmedlone127/2023-09-14-dzarabert_ar.md new file mode 100644 index 00000000000000..4b1abced8e95db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-dzarabert_ar.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Arabic dzarabert BertEmbeddings from Sifal +author: John Snow Labs +name: dzarabert +date: 2023-09-14 +tags: [bert, ar, open_source, fill_mask, onnx] +task: Embeddings +language: ar +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dzarabert` is a Arabic model originally trained by Sifal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dzarabert_ar_5.1.1_3.0_1694657627709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dzarabert_ar_5.1.1_3.0_1694657627709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("dzarabert","ar") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("dzarabert", "ar") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dzarabert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ar| +|Size:|418.8 MB| + +## References + +https://huggingface.co/Sifal/dzarabert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_mberttok_en.md b/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_mberttok_en.md new file mode 100644 index 00000000000000..fd76997befd39f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_mberttok_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fi_mbertmodel_mberttok BertEmbeddings from hgiyt +author: John Snow Labs +name: fi_mbertmodel_mberttok +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fi_mbertmodel_mberttok` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fi_mbertmodel_mberttok_en_5.1.1_3.0_1694693637004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fi_mbertmodel_mberttok_en_5.1.1_3.0_1694693637004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fi_mbertmodel_mberttok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fi_mbertmodel_mberttok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fi_mbertmodel_mberttok| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|495.8 MB| + +## References + +https://huggingface.co/hgiyt/fi-mbertmodel-mberttok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_monotok_adapter_en.md b/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_monotok_adapter_en.md new file mode 100644 index 00000000000000..240a390a9027bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_monotok_adapter_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fi_mbertmodel_monotok_adapter BertEmbeddings from hgiyt +author: John Snow Labs +name: fi_mbertmodel_monotok_adapter +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fi_mbertmodel_monotok_adapter` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fi_mbertmodel_monotok_adapter_en_5.1.1_3.0_1694693799221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fi_mbertmodel_monotok_adapter_en_5.1.1_3.0_1694693799221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fi_mbertmodel_monotok_adapter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fi_mbertmodel_monotok_adapter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fi_mbertmodel_monotok_adapter| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.9 MB| + +## References + +https://huggingface.co/hgiyt/fi-mbertmodel-monotok-adapter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_monotok_en.md b/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_monotok_en.md new file mode 100644 index 00000000000000..44d484cfba9eac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-fi_mbertmodel_monotok_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fi_mbertmodel_monotok BertEmbeddings from hgiyt +author: John Snow Labs +name: fi_mbertmodel_monotok +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fi_mbertmodel_monotok` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fi_mbertmodel_monotok_en_5.1.1_3.0_1694693956177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fi_mbertmodel_monotok_en_5.1.1_3.0_1694693956177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fi_mbertmodel_monotok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fi_mbertmodel_monotok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fi_mbertmodel_monotok| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.8 MB| + +## References + +https://huggingface.co/hgiyt/fi-mbertmodel-monotok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-fi_monomodel_mberttok_en.md b/docs/_posts/ahmedlone127/2023-09-14-fi_monomodel_mberttok_en.md new file mode 100644 index 00000000000000..b17ee8b1448d47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-fi_monomodel_mberttok_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fi_monomodel_mberttok BertEmbeddings from hgiyt +author: John Snow Labs +name: fi_monomodel_mberttok +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fi_monomodel_mberttok` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fi_monomodel_mberttok_en_5.1.1_3.0_1694694133556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fi_monomodel_mberttok_en_5.1.1_3.0_1694694133556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fi_monomodel_mberttok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fi_monomodel_mberttok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fi_monomodel_mberttok| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|495.0 MB| + +## References + +https://huggingface.co/hgiyt/fi-monomodel-mberttok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-fi_monomodel_monotok_en.md b/docs/_posts/ahmedlone127/2023-09-14-fi_monomodel_monotok_en.md new file mode 100644 index 00000000000000..07d9a6c53cd28e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-fi_monomodel_monotok_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fi_monomodel_monotok BertEmbeddings from hgiyt +author: John Snow Labs +name: fi_monomodel_monotok +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fi_monomodel_monotok` is a English model originally trained by hgiyt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fi_monomodel_monotok_en_5.1.1_3.0_1694694310500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fi_monomodel_monotok_en_5.1.1_3.0_1694694310500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("fi_monomodel_monotok","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("fi_monomodel_monotok", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fi_monomodel_monotok| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.3 MB| + +## References + +https://huggingface.co/hgiyt/fi-monomodel-monotok \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-first_try_4_en.md b/docs/_posts/ahmedlone127/2023-09-14-first_try_4_en.md new file mode 100644 index 00000000000000..7eec5d6f9072f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-first_try_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English first_try_4 DistilBertEmbeddings from disanda +author: John Snow Labs +name: first_try_4 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_try_4` is a English model originally trained by disanda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_try_4_en_5.1.2_3.0_1694735581056.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_try_4_en_5.1.2_3.0_1694735581056.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("first_try_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("first_try_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_try_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/disanda/first_try_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-frpile_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-14-frpile_mlm_en.md new file mode 100644 index 00000000000000..6f2bcb829de42a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-frpile_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English frpile_mlm BertEmbeddings from DragosGorduza +author: John Snow Labs +name: frpile_mlm +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`frpile_mlm` is a English model originally trained by DragosGorduza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/frpile_mlm_en_5.1.1_3.0_1694656401826.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/frpile_mlm_en_5.1.1_3.0_1694656401826.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("frpile_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("frpile_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|frpile_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/DragosGorduza/FRPile_MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-gbert_base_finetuned_twitter_janst_en.md b/docs/_posts/ahmedlone127/2023-09-14-gbert_base_finetuned_twitter_janst_en.md new file mode 100644 index 00000000000000..a7d41d1877bdf1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-gbert_base_finetuned_twitter_janst_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gbert_base_finetuned_twitter_janst BertEmbeddings from JanSt +author: John Snow Labs +name: gbert_base_finetuned_twitter_janst +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gbert_base_finetuned_twitter_janst` is a English model originally trained by JanSt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gbert_base_finetuned_twitter_janst_en_5.1.1_3.0_1694666124788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gbert_base_finetuned_twitter_janst_en_5.1.1_3.0_1694666124788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gbert_base_finetuned_twitter_janst","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gbert_base_finetuned_twitter_janst", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gbert_base_finetuned_twitter_janst| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.7 MB| + +## References + +https://huggingface.co/JanSt/gbert-base-finetuned-twitter \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-gbert_large_autopart_en.md b/docs/_posts/ahmedlone127/2023-09-14-gbert_large_autopart_en.md new file mode 100644 index 00000000000000..fe87dd40038a5d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-gbert_large_autopart_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gbert_large_autopart BertEmbeddings from luciore95 +author: John Snow Labs +name: gbert_large_autopart +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gbert_large_autopart` is a English model originally trained by luciore95. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gbert_large_autopart_en_5.1.1_3.0_1694669047387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gbert_large_autopart_en_5.1.1_3.0_1694669047387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gbert_large_autopart","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gbert_large_autopart", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gbert_large_autopart| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/luciore95/gbert-large-autopart \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-gbert_large_finetuned_cust18_en.md b/docs/_posts/ahmedlone127/2023-09-14-gbert_large_finetuned_cust18_en.md new file mode 100644 index 00000000000000..ce2f6db90b1414 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-gbert_large_finetuned_cust18_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gbert_large_finetuned_cust18 BertEmbeddings from shafin +author: John Snow Labs +name: gbert_large_finetuned_cust18 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gbert_large_finetuned_cust18` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gbert_large_finetuned_cust18_en_5.1.1_3.0_1694664219714.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gbert_large_finetuned_cust18_en_5.1.1_3.0_1694664219714.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gbert_large_finetuned_cust18","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gbert_large_finetuned_cust18", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gbert_large_finetuned_cust18| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/shafin/gbert-large-finetuned-cust18 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-gbert_large_finetuned_cust_en.md b/docs/_posts/ahmedlone127/2023-09-14-gbert_large_finetuned_cust_en.md new file mode 100644 index 00000000000000..32310949e01b6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-gbert_large_finetuned_cust_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English gbert_large_finetuned_cust BertEmbeddings from shafin +author: John Snow Labs +name: gbert_large_finetuned_cust +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gbert_large_finetuned_cust` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gbert_large_finetuned_cust_en_5.1.1_3.0_1694662333720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gbert_large_finetuned_cust_en_5.1.1_3.0_1694662333720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gbert_large_finetuned_cust","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gbert_large_finetuned_cust", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gbert_large_finetuned_cust| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/shafin/gbert-large-finetuned-cust \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-gepabert_de.md b/docs/_posts/ahmedlone127/2023-09-14-gepabert_de.md new file mode 100644 index 00000000000000..725f463a85f721 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-gepabert_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German gepabert BertEmbeddings from aehrm +author: John Snow Labs +name: gepabert +date: 2023-09-14 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gepabert` is a German model originally trained by aehrm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gepabert_de_5.1.1_3.0_1694654562569.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gepabert_de_5.1.1_3.0_1694654562569.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gepabert","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gepabert", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gepabert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|1.3 GB| + +## References + +https://huggingface.co/aehrm/gepabert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-google_Job_data_tuned_trial_1_en.md b/docs/_posts/ahmedlone127/2023-09-14-google_Job_data_tuned_trial_1_en.md new file mode 100644 index 00000000000000..054ab1e0a89b4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-google_Job_data_tuned_trial_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English google_Job_data_tuned_trial_1 DistilBertEmbeddings from EslamAhmed +author: John Snow Labs +name: google_Job_data_tuned_trial_1 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`google_Job_data_tuned_trial_1` is a English model originally trained by EslamAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/google_Job_data_tuned_trial_1_en_5.1.2_3.0_1694734998102.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/google_Job_data_tuned_trial_1_en_5.1.2_3.0_1694734998102.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("google_Job_data_tuned_trial_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("google_Job_data_tuned_trial_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|google_Job_data_tuned_trial_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/EslamAhmed/google_Job_data_tuned_trial_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-gujarati_bert_scratch_gu.md b/docs/_posts/ahmedlone127/2023-09-14-gujarati_bert_scratch_gu.md new file mode 100644 index 00000000000000..8a8d4b1e24d396 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-gujarati_bert_scratch_gu.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Gujarati gujarati_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: gujarati_bert_scratch +date: 2023-09-14 +tags: [bert, gu, open_source, fill_mask, onnx] +task: Embeddings +language: gu +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`gujarati_bert_scratch` is a Gujarati model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/gujarati_bert_scratch_gu_5.1.1_3.0_1694652276670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/gujarati_bert_scratch_gu_5.1.1_3.0_1694652276670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("gujarati_bert_scratch","gu") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("gujarati_bert_scratch", "gu") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|gujarati_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|gu| +|Size:|470.4 MB| + +## References + +https://huggingface.co/l3cube-pune/gujarati-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-hf_distilbert_imdb_mlm_cosine_en.md b/docs/_posts/ahmedlone127/2023-09-14-hf_distilbert_imdb_mlm_cosine_en.md new file mode 100644 index 00000000000000..f2a91829c87dac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-hf_distilbert_imdb_mlm_cosine_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hf_distilbert_imdb_mlm_cosine DistilBertEmbeddings from nos1de +author: John Snow Labs +name: hf_distilbert_imdb_mlm_cosine +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hf_distilbert_imdb_mlm_cosine` is a English model originally trained by nos1de. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_cosine_en_5.1.2_3.0_1694734368264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_cosine_en_5.1.2_3.0_1694734368264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("hf_distilbert_imdb_mlm_cosine","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("hf_distilbert_imdb_mlm_cosine", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hf_distilbert_imdb_mlm_cosine| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/nos1de/hf-distilbert-imdb-mlm-cosine \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models_en.md b/docs/_posts/ahmedlone127/2023-09-14-incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models_en.md new file mode 100644 index 00000000000000..bee6ac16a86e3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models BertEmbeddings from sophia-jihye +author: John Snow Labs +name: incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models` is a English model originally trained by sophia-jihye. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models_en_5.1.1_3.0_1694667130967.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models_en_5.1.1_3.0_1694667130967.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|incorporation_of_company_related_factual_knowledge_into_pre_trained_language_models| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|406.4 MB| + +## References + +https://huggingface.co/sophia-jihye/Incorporation_of_Company-Related_Factual_Knowledge_into_Pre-trained_Language_Models \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-itd_bert_en.md b/docs/_posts/ahmedlone127/2023-09-14-itd_bert_en.md new file mode 100644 index 00000000000000..f5eccdd79bdf46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-itd_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English itd_bert BertEmbeddings from melll-uff +author: John Snow Labs +name: itd_bert +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`itd_bert` is a English model originally trained by melll-uff. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/itd_bert_en_5.1.1_3.0_1694669236393.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/itd_bert_en_5.1.1_3.0_1694669236393.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("itd_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("itd_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|itd_bert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.3 MB| + +## References + +https://huggingface.co/melll-uff/itd_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-itd_longformer_en.md b/docs/_posts/ahmedlone127/2023-09-14-itd_longformer_en.md new file mode 100644 index 00000000000000..8a2a462ff6c2d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-itd_longformer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English itd_longformer BertEmbeddings from melll-uff +author: John Snow Labs +name: itd_longformer +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`itd_longformer` is a English model originally trained by melll-uff. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/itd_longformer_en_5.1.1_3.0_1694669346528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/itd_longformer_en_5.1.1_3.0_1694669346528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("itd_longformer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("itd_longformer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|itd_longformer| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|415.9 MB| + +## References + +https://huggingface.co/melll-uff/itd_longformer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-jobs_pretraining_model_en.md b/docs/_posts/ahmedlone127/2023-09-14-jobs_pretraining_model_en.md new file mode 100644 index 00000000000000..75d3724bbd497b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-jobs_pretraining_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English jobs_pretraining_model BertEmbeddings from afif00 +author: John Snow Labs +name: jobs_pretraining_model +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`jobs_pretraining_model` is a English model originally trained by afif00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/jobs_pretraining_model_en_5.1.1_3.0_1694661632301.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/jobs_pretraining_model_en_5.1.1_3.0_1694661632301.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("jobs_pretraining_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("jobs_pretraining_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|jobs_pretraining_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/afif00/jobs-pretraining-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-kannada_bert_scratch_kn.md b/docs/_posts/ahmedlone127/2023-09-14-kannada_bert_scratch_kn.md new file mode 100644 index 00000000000000..489c54b6534d66 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-kannada_bert_scratch_kn.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Kannada kannada_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: kannada_bert_scratch +date: 2023-09-14 +tags: [bert, kn, open_source, fill_mask, onnx] +task: Embeddings +language: kn +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kannada_bert_scratch` is a Kannada model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kannada_bert_scratch_kn_5.1.1_3.0_1694652710651.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kannada_bert_scratch_kn_5.1.1_3.0_1694652710651.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kannada_bert_scratch","kn") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kannada_bert_scratch", "kn") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kannada_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|kn| +|Size:|470.6 MB| + +## References + +https://huggingface.co/l3cube-pune/kannada-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-klue_bert_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-14-klue_bert_mlm_en.md new file mode 100644 index 00000000000000..a4ec260a075c0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-klue_bert_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English klue_bert_mlm BertEmbeddings from goodjw +author: John Snow Labs +name: klue_bert_mlm +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`klue_bert_mlm` is a English model originally trained by goodjw. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/klue_bert_mlm_en_5.1.1_3.0_1694651022503.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/klue_bert_mlm_en_5.1.1_3.0_1694651022503.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("klue_bert_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("klue_bert_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|klue_bert_mlm| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|412.4 MB| + +## References + +https://huggingface.co/goodjw/klue-bert-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.000006_en.md b/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.000006_en.md new file mode 100644 index 00000000000000..95500a982ee892 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.000006_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_1000_0.000006 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_1000_0.000006 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_1000_0.000006` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_1000_0.000006_en_5.1.1_3.0_1694663885237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_1000_0.000006_en_5.1.1_3.0_1694663885237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_1000_0.000006","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_1000_0.000006", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_1000_0.000006| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_1000_0.000006 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.00006_en.md b/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.00006_en.md new file mode 100644 index 00000000000000..9467e699fed262 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.00006_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_1000_0.00006 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_1000_0.00006 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_1000_0.00006` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_1000_0.00006_en_5.1.1_3.0_1694663539113.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_1000_0.00006_en_5.1.1_3.0_1694663539113.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_1000_0.00006","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_1000_0.00006", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_1000_0.00006| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_1000_0.00006 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.0003_en.md b/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.0003_en.md new file mode 100644 index 00000000000000..569429ac101f50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-kw_pubmed_1000_0.0003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kw_pubmed_1000_0.0003 BertEmbeddings from enoriega +author: John Snow Labs +name: kw_pubmed_1000_0.0003 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kw_pubmed_1000_0.0003` is a English model originally trained by enoriega. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kw_pubmed_1000_0.0003_en_5.1.1_3.0_1694663260280.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kw_pubmed_1000_0.0003_en_5.1.1_3.0_1694663260280.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("kw_pubmed_1000_0.0003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("kw_pubmed_1000_0.0003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kw_pubmed_1000_0.0003| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|408.2 MB| + +## References + +https://huggingface.co/enoriega/kw_pubmed_1000_0.0003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-legal_hebert_ft_en.md b/docs/_posts/ahmedlone127/2023-09-14-legal_hebert_ft_en.md new file mode 100644 index 00000000000000..87a87782f0a4e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-legal_hebert_ft_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legal_hebert_ft BertEmbeddings from avichr +author: John Snow Labs +name: legal_hebert_ft +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legal_hebert_ft` is a English model originally trained by avichr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legal_hebert_ft_en_5.1.1_3.0_1694656937535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legal_hebert_ft_en_5.1.1_3.0_1694656937535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legal_hebert_ft","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legal_hebert_ft", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legal_hebert_ft| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.9 MB| + +## References + +https://huggingface.co/avichr/Legal-heBERT_ft \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-legalbert_large_1.7m_1_en.md b/docs/_posts/ahmedlone127/2023-09-14-legalbert_large_1.7m_1_en.md new file mode 100644 index 00000000000000..2129067405d6a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-legalbert_large_1.7m_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legalbert_large_1.7m_1 BertEmbeddings from pile-of-law +author: John Snow Labs +name: legalbert_large_1.7m_1 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalbert_large_1.7m_1` is a English model originally trained by pile-of-law. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalbert_large_1.7m_1_en_5.1.1_3.0_1694651512853.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalbert_large_1.7m_1_en_5.1.1_3.0_1694651512853.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalbert_large_1.7m_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalbert_large_1.7m_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalbert_large_1.7m_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|643.5 MB| + +## References + +https://huggingface.co/pile-of-law/legalbert-large-1.7M-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-legalbert_large_1.7m_2_en.md b/docs/_posts/ahmedlone127/2023-09-14-legalbert_large_1.7m_2_en.md new file mode 100644 index 00000000000000..512842ee82e8d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-legalbert_large_1.7m_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English legalbert_large_1.7m_2 BertEmbeddings from pile-of-law +author: John Snow Labs +name: legalbert_large_1.7m_2 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`legalbert_large_1.7m_2` is a English model originally trained by pile-of-law. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/legalbert_large_1.7m_2_en_5.1.1_3.0_1694653987323.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/legalbert_large_1.7m_2_en_5.1.1_3.0_1694653987323.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("legalbert_large_1.7m_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("legalbert_large_1.7m_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|legalbert_large_1.7m_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|645.3 MB| + +## References + +https://huggingface.co/pile-of-law/legalbert-large-1.7M-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-logion_50k_wordpiece_en.md b/docs/_posts/ahmedlone127/2023-09-14-logion_50k_wordpiece_en.md new file mode 100644 index 00000000000000..18637a4b33e784 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-logion_50k_wordpiece_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English logion_50k_wordpiece BertEmbeddings from cabrooks +author: John Snow Labs +name: logion_50k_wordpiece +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`logion_50k_wordpiece` is a English model originally trained by cabrooks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/logion_50k_wordpiece_en_5.1.1_3.0_1694661683131.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/logion_50k_wordpiece_en_5.1.1_3.0_1694661683131.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("logion_50k_wordpiece","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("logion_50k_wordpiece", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|logion_50k_wordpiece| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.1 MB| + +## References + +https://huggingface.co/cabrooks/LOGION-50k_wordpiece \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-louribert_en.md b/docs/_posts/ahmedlone127/2023-09-14-louribert_en.md new file mode 100644 index 00000000000000..74c2850a318111 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-louribert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English louribert BertEmbeddings from saeid7776 +author: John Snow Labs +name: louribert +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`louribert` is a English model originally trained by saeid7776. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/louribert_en_5.1.1_3.0_1694664381526.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/louribert_en_5.1.1_3.0_1694664381526.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("louribert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("louribert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|louribert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|668.1 MB| + +## References + +https://huggingface.co/saeid7776/LouriBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-louribert_more_tokens_saeid7776_en.md b/docs/_posts/ahmedlone127/2023-09-14-louribert_more_tokens_saeid7776_en.md new file mode 100644 index 00000000000000..a0dbf24ce80480 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-louribert_more_tokens_saeid7776_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English louribert_more_tokens_saeid7776 BertEmbeddings from saeid7776 +author: John Snow Labs +name: louribert_more_tokens_saeid7776 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`louribert_more_tokens_saeid7776` is a English model originally trained by saeid7776. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/louribert_more_tokens_saeid7776_en_5.1.1_3.0_1694665209272.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/louribert_more_tokens_saeid7776_en_5.1.1_3.0_1694665209272.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("louribert_more_tokens_saeid7776","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("louribert_more_tokens_saeid7776", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|louribert_more_tokens_saeid7776| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|670.8 MB| + +## References + +https://huggingface.co/saeid7776/LouriBert_more_tokens \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-louribert_more_tokens_zahrabahmani61_en.md b/docs/_posts/ahmedlone127/2023-09-14-louribert_more_tokens_zahrabahmani61_en.md new file mode 100644 index 00000000000000..b267727fc08d49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-louribert_more_tokens_zahrabahmani61_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English louribert_more_tokens_zahrabahmani61 BertEmbeddings from zahrabahmani61 +author: John Snow Labs +name: louribert_more_tokens_zahrabahmani61 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`louribert_more_tokens_zahrabahmani61` is a English model originally trained by zahrabahmani61. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/louribert_more_tokens_zahrabahmani61_en_5.1.1_3.0_1694674505303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/louribert_more_tokens_zahrabahmani61_en_5.1.1_3.0_1694674505303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("louribert_more_tokens_zahrabahmani61","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("louribert_more_tokens_zahrabahmani61", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|louribert_more_tokens_zahrabahmani61| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|668.1 MB| + +## References + +https://huggingface.co/zahrabahmani61/LouriBert_more_tokens \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-lsg16k_Italian_Legal_BERT_it.md b/docs/_posts/ahmedlone127/2023-09-14-lsg16k_Italian_Legal_BERT_it.md new file mode 100644 index 00000000000000..9db6537f150c15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-lsg16k_Italian_Legal_BERT_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian lsg16k_Italian_Legal_BERT BertEmbeddings from dlicari +author: John Snow Labs +name: lsg16k_Italian_Legal_BERT +date: 2023-09-14 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lsg16k_Italian_Legal_BERT` is a Italian model originally trained by dlicari. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lsg16k_Italian_Legal_BERT_it_5.1.1_3.0_1694693338486.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lsg16k_Italian_Legal_BERT_it_5.1.1_3.0_1694693338486.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lsg16k_Italian_Legal_BERT","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lsg16k_Italian_Legal_BERT", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lsg16k_Italian_Legal_BERT| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|454.6 MB| + +## References + +https://huggingface.co/dlicari/lsg16k-Italian-Legal-BERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-lumbarradiologyreports_en.md b/docs/_posts/ahmedlone127/2023-09-14-lumbarradiologyreports_en.md new file mode 100644 index 00000000000000..bfd7194db7ac69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-lumbarradiologyreports_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lumbarradiologyreports BertEmbeddings from YK96 +author: John Snow Labs +name: lumbarradiologyreports +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lumbarradiologyreports` is a English model originally trained by YK96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lumbarradiologyreports_en_5.1.1_3.0_1694669457817.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lumbarradiologyreports_en_5.1.1_3.0_1694669457817.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("lumbarradiologyreports","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("lumbarradiologyreports", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lumbarradiologyreports| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/YK96/LumbarRadiologyReports \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-malayalam_bert_scratch_ml.md b/docs/_posts/ahmedlone127/2023-09-14-malayalam_bert_scratch_ml.md new file mode 100644 index 00000000000000..3a39ec6415ec0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-malayalam_bert_scratch_ml.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Malayalam malayalam_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: malayalam_bert_scratch +date: 2023-09-14 +tags: [bert, ml, open_source, fill_mask, onnx] +task: Embeddings +language: ml +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malayalam_bert_scratch` is a Malayalam model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malayalam_bert_scratch_ml_5.1.1_3.0_1694651911081.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malayalam_bert_scratch_ml_5.1.1_3.0_1694651911081.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("malayalam_bert_scratch","ml") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("malayalam_bert_scratch", "ml") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malayalam_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ml| +|Size:|470.7 MB| + +## References + +https://huggingface.co/l3cube-pune/malayalam-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-marathi_distilbert_mr.md b/docs/_posts/ahmedlone127/2023-09-14-marathi_distilbert_mr.md new file mode 100644 index 00000000000000..5e73fdeb53bf6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-marathi_distilbert_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_distilbert DistilBertEmbeddings from DarshanDeshpande +author: John Snow Labs +name: marathi_distilbert +date: 2023-09-14 +tags: [distilbert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_distilbert` is a Marathi model originally trained by DarshanDeshpande. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_distilbert_mr_5.1.2_3.0_1694735432160.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_distilbert_mr_5.1.2_3.0_1694735432160.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("marathi_distilbert","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("marathi_distilbert", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|247.5 MB| + +## References + +https://huggingface.co/DarshanDeshpande/marathi-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mbert_squad_en.md b/docs/_posts/ahmedlone127/2023-09-14-mbert_squad_en.md new file mode 100644 index 00000000000000..13c840d9731b59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mbert_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mbert_squad BertEmbeddings from oceanpty +author: John Snow Labs +name: mbert_squad +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mbert_squad` is a English model originally trained by oceanpty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mbert_squad_en_5.1.1_3.0_1694652176700.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mbert_squad_en_5.1.1_3.0_1694652176700.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mbert_squad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mbert_squad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mbert_squad| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|665.0 MB| + +## References + +https://huggingface.co/oceanpty/mbert-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mdistilbertV3.1_en.md b/docs/_posts/ahmedlone127/2023-09-14-mdistilbertV3.1_en.md new file mode 100644 index 00000000000000..a780a45ca4aaca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mdistilbertV3.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mdistilbertV3.1 DistilBertEmbeddings from bongsoo +author: John Snow Labs +name: mdistilbertV3.1 +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdistilbertV3.1` is a English model originally trained by bongsoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdistilbertV3.1_en_5.1.2_3.0_1694735345489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdistilbertV3.1_en_5.1.2_3.0_1694735345489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("mdistilbertV3.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("mdistilbertV3.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdistilbertV3.1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|621.1 MB| + +## References + +https://huggingface.co/bongsoo/mdistilbertV3.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-medbert_512_norwegian_duplicates_de.md b/docs/_posts/ahmedlone127/2023-09-14-medbert_512_norwegian_duplicates_de.md new file mode 100644 index 00000000000000..4af11bf2aa10bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-medbert_512_norwegian_duplicates_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German medbert_512_norwegian_duplicates BertEmbeddings from GerMedBERT +author: John Snow Labs +name: medbert_512_norwegian_duplicates +date: 2023-09-14 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medbert_512_norwegian_duplicates` is a German model originally trained by GerMedBERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medbert_512_norwegian_duplicates_de_5.1.1_3.0_1694654562610.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medbert_512_norwegian_duplicates_de_5.1.1_3.0_1694654562610.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("medbert_512_norwegian_duplicates","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("medbert_512_norwegian_duplicates", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medbert_512_norwegian_duplicates| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.8 MB| + +## References + +https://huggingface.co/GerMedBERT/medbert-512-no-duplicates \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-medbit_it.md b/docs/_posts/ahmedlone127/2023-09-14-medbit_it.md new file mode 100644 index 00000000000000..7742c28c348186 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-medbit_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian medbit BertEmbeddings from IVN-RIN +author: John Snow Labs +name: medbit +date: 2023-09-14 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medbit` is a Italian model originally trained by IVN-RIN. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medbit_it_5.1.1_3.0_1694658556151.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medbit_it_5.1.1_3.0_1694658556151.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("medbit","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("medbit", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medbit| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.2 MB| + +## References + +https://huggingface.co/IVN-RIN/medBIT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-medbit_r3_plus_it.md b/docs/_posts/ahmedlone127/2023-09-14-medbit_r3_plus_it.md new file mode 100644 index 00000000000000..21c65a8b1c6da2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-medbit_r3_plus_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian medbit_r3_plus BertEmbeddings from IVN-RIN +author: John Snow Labs +name: medbit_r3_plus +date: 2023-09-14 +tags: [bert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medbit_r3_plus` is a Italian model originally trained by IVN-RIN. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medbit_r3_plus_it_5.1.1_3.0_1694655730518.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medbit_r3_plus_it_5.1.1_3.0_1694655730518.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("medbit_r3_plus","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("medbit_r3_plus", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medbit_r3_plus| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|409.2 MB| + +## References + +https://huggingface.co/IVN-RIN/medBIT-r3-plus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-medium_mlm_tweet_en.md b/docs/_posts/ahmedlone127/2023-09-14-medium_mlm_tweet_en.md new file mode 100644 index 00000000000000..29d7db54fcfc96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-medium_mlm_tweet_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English medium_mlm_tweet BertEmbeddings from muhtasham +author: John Snow Labs +name: medium_mlm_tweet +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medium_mlm_tweet` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medium_mlm_tweet_en_5.1.1_3.0_1694664476857.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medium_mlm_tweet_en_5.1.1_3.0_1694664476857.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("medium_mlm_tweet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("medium_mlm_tweet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medium_mlm_tweet| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|154.2 MB| + +## References + +https://huggingface.co/muhtasham/medium-mlm-tweet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-medruberttiny2_ru.md b/docs/_posts/ahmedlone127/2023-09-14-medruberttiny2_ru.md new file mode 100644 index 00000000000000..5aefa2d60c1af0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-medruberttiny2_ru.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Russian medruberttiny2 BertEmbeddings from DmitryPogrebnoy +author: John Snow Labs +name: medruberttiny2 +date: 2023-09-14 +tags: [bert, ru, open_source, fill_mask, onnx] +task: Embeddings +language: ru +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medruberttiny2` is a Russian model originally trained by DmitryPogrebnoy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medruberttiny2_ru_5.1.1_3.0_1694658844568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medruberttiny2_ru_5.1.1_3.0_1694658844568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("medruberttiny2","ru") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("medruberttiny2", "ru") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medruberttiny2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ru| +|Size:|109.1 MB| + +## References + +https://huggingface.co/DmitryPogrebnoy/MedRuBertTiny2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mergedistill_base_cased_anneal_en.md b/docs/_posts/ahmedlone127/2023-09-14-mergedistill_base_cased_anneal_en.md new file mode 100644 index 00000000000000..e7cccffcb1d689 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mergedistill_base_cased_anneal_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mergedistill_base_cased_anneal BertEmbeddings from amitness +author: John Snow Labs +name: mergedistill_base_cased_anneal +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mergedistill_base_cased_anneal` is a English model originally trained by amitness. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mergedistill_base_cased_anneal_en_5.1.1_3.0_1694655583062.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mergedistill_base_cased_anneal_en_5.1.1_3.0_1694655583062.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mergedistill_base_cased_anneal","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mergedistill_base_cased_anneal", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mergedistill_base_cased_anneal| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|538.7 MB| + +## References + +https://huggingface.co/amitness/mergedistill-base-cased-anneal \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mergedistill_base_cased_anneal_v4_en.md b/docs/_posts/ahmedlone127/2023-09-14-mergedistill_base_cased_anneal_v4_en.md new file mode 100644 index 00000000000000..db87564f46d8d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mergedistill_base_cased_anneal_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mergedistill_base_cased_anneal_v4 BertEmbeddings from amitness +author: John Snow Labs +name: mergedistill_base_cased_anneal_v4 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mergedistill_base_cased_anneal_v4` is a English model originally trained by amitness. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mergedistill_base_cased_anneal_v4_en_5.1.1_3.0_1694658171035.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mergedistill_base_cased_anneal_v4_en_5.1.1_3.0_1694658171035.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mergedistill_base_cased_anneal_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mergedistill_base_cased_anneal_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mergedistill_base_cased_anneal_v4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|539.3 MB| + +## References + +https://huggingface.co/amitness/mergedistill-base-cased-anneal-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mini_mlm_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-14-mini_mlm_imdb_en.md new file mode 100644 index 00000000000000..08aa73b0b2ed25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mini_mlm_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mini_mlm_imdb BertEmbeddings from muhtasham +author: John Snow Labs +name: mini_mlm_imdb +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mini_mlm_imdb` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mini_mlm_imdb_en_5.1.1_3.0_1694664315374.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mini_mlm_imdb_en_5.1.1_3.0_1694664315374.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mini_mlm_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mini_mlm_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mini_mlm_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/muhtasham/mini-mlm-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mini_mlm_tweet_en.md b/docs/_posts/ahmedlone127/2023-09-14-mini_mlm_tweet_en.md new file mode 100644 index 00000000000000..6d36dd181e8794 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mini_mlm_tweet_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mini_mlm_tweet BertEmbeddings from muhtasham +author: John Snow Labs +name: mini_mlm_tweet +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mini_mlm_tweet` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mini_mlm_tweet_en_5.1.1_3.0_1694663987603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mini_mlm_tweet_en_5.1.1_3.0_1694663987603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mini_mlm_tweet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mini_mlm_tweet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mini_mlm_tweet| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|41.8 MB| + +## References + +https://huggingface.co/muhtasham/mini-mlm-tweet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mlm_20230416_003_1_en.md b/docs/_posts/ahmedlone127/2023-09-14-mlm_20230416_003_1_en.md new file mode 100644 index 00000000000000..564cc07291fc77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mlm_20230416_003_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230416_003_1 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230416_003_1 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230416_003_1` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230416_003_1_en_5.1.1_3.0_1694657044354.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230416_003_1_en_5.1.1_3.0_1694657044354.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230416_003_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230416_003_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230416_003_1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230416-003-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mlm_20230416_003_2_en.md b/docs/_posts/ahmedlone127/2023-09-14-mlm_20230416_003_2_en.md new file mode 100644 index 00000000000000..32daa3fdf5ecd3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mlm_20230416_003_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_20230416_003_2 BertEmbeddings from intanm +author: John Snow Labs +name: mlm_20230416_003_2 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_20230416_003_2` is a English model originally trained by intanm. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_20230416_003_2_en_5.1.1_3.0_1694658343177.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_20230416_003_2_en_5.1.1_3.0_1694658343177.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlm_20230416_003_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlm_20230416_003_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_20230416_003_2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|464.2 MB| + +## References + +https://huggingface.co/intanm/mlm-20230416-003-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mlm_model_en.md b/docs/_posts/ahmedlone127/2023-09-14-mlm_model_en.md new file mode 100644 index 00000000000000..342d9ff186049d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mlm_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_model DistilBertEmbeddings from amkorba +author: John Snow Labs +name: mlm_model +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_model` is a English model originally trained by amkorba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_model_en_5.1.2_3.0_1694735450221.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_model_en_5.1.2_3.0_1694735450221.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("mlm_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("mlm_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/amkorba/mlm-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mlperf_inference_bert_pytorch_fp32_squad_v1.1_en.md b/docs/_posts/ahmedlone127/2023-09-14-mlperf_inference_bert_pytorch_fp32_squad_v1.1_en.md new file mode 100644 index 00000000000000..5220b152bfa159 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mlperf_inference_bert_pytorch_fp32_squad_v1.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlperf_inference_bert_pytorch_fp32_squad_v1.1 BertEmbeddings from cknowledge +author: John Snow Labs +name: mlperf_inference_bert_pytorch_fp32_squad_v1.1 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlperf_inference_bert_pytorch_fp32_squad_v1.1` is a English model originally trained by cknowledge. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlperf_inference_bert_pytorch_fp32_squad_v1.1_en_5.1.1_3.0_1694660332701.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlperf_inference_bert_pytorch_fp32_squad_v1.1_en_5.1.1_3.0_1694660332701.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mlperf_inference_bert_pytorch_fp32_squad_v1.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mlperf_inference_bert_pytorch_fp32_squad_v1.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlperf_inference_bert_pytorch_fp32_squad_v1.1| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|1.3 GB| + +## References + +https://huggingface.co/cknowledge/mlperf-inference-bert-pytorch-fp32-squad-v1.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-model_imdb_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-14-model_imdb_finetuned_en.md new file mode 100644 index 00000000000000..fb86215e144fba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-model_imdb_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_imdb_finetuned BertEmbeddings from phanidhar +author: John Snow Labs +name: model_imdb_finetuned +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_imdb_finetuned` is a English model originally trained by phanidhar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_imdb_finetuned_en_5.1.1_3.0_1694662202940.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_imdb_finetuned_en_5.1.1_3.0_1694662202940.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_imdb_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_imdb_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_imdb_finetuned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/phanidhar/model-imdb-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-model_saeid7776_en.md b/docs/_posts/ahmedlone127/2023-09-14-model_saeid7776_en.md new file mode 100644 index 00000000000000..686979f1e283ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-model_saeid7776_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_saeid7776 BertEmbeddings from saeid7776 +author: John Snow Labs +name: model_saeid7776 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_saeid7776` is a English model originally trained by saeid7776. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_saeid7776_en_5.1.1_3.0_1694665486872.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_saeid7776_en_5.1.1_3.0_1694665486872.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_saeid7776","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_saeid7776", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_saeid7776| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|672.3 MB| + +## References + +https://huggingface.co/saeid7776/model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-model_v02_en.md b/docs/_posts/ahmedlone127/2023-09-14-model_v02_en.md new file mode 100644 index 00000000000000..c54d628bbc45d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-model_v02_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English model_v02 BertEmbeddings from saeid7776 +author: John Snow Labs +name: model_v02 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`model_v02` is a English model originally trained by saeid7776. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/model_v02_en_5.1.1_3.0_1694665653448.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/model_v02_en_5.1.1_3.0_1694665653448.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("model_v02","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("model_v02", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|model_v02| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|672.8 MB| + +## References + +https://huggingface.co/saeid7776/model_v02 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-multilingual_bert_model_classiffication_xx.md b/docs/_posts/ahmedlone127/2023-09-14-multilingual_bert_model_classiffication_xx.md new file mode 100644 index 00000000000000..2bf7dd4ebb7d34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-multilingual_bert_model_classiffication_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual multilingual_bert_model_classiffication BertEmbeddings from billfass +author: John Snow Labs +name: multilingual_bert_model_classiffication +date: 2023-09-14 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_bert_model_classiffication` is a Multilingual model originally trained by billfass. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_bert_model_classiffication_xx_5.1.1_3.0_1694675880530.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_bert_model_classiffication_xx_5.1.1_3.0_1694675880530.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("multilingual_bert_model_classiffication","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("multilingual_bert_model_classiffication", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_bert_model_classiffication| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|663.2 MB| + +## References + +https://huggingface.co/billfass/multilingual_bert_model_classiffication \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-muril_base_cased_en.md b/docs/_posts/ahmedlone127/2023-09-14-muril_base_cased_en.md new file mode 100644 index 00000000000000..4a9eb0904bf4fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-muril_base_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English muril_base_cased BertEmbeddings from google +author: John Snow Labs +name: muril_base_cased +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`muril_base_cased` is a English model originally trained by google. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/muril_base_cased_en_5.1.1_3.0_1694651581808.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/muril_base_cased_en_5.1.1_3.0_1694651581808.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("muril_base_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("muril_base_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|muril_base_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|890.4 MB| + +## References + +https://huggingface.co/google/muril-base-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mvr_squad_bert_base_multilingual_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-14-mvr_squad_bert_base_multilingual_cased_xx.md new file mode 100644 index 00000000000000..6f71bf2d7109eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mvr_squad_bert_base_multilingual_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual mvr_squad_bert_base_multilingual_cased BertEmbeddings from dyyyyyyyy +author: John Snow Labs +name: mvr_squad_bert_base_multilingual_cased +date: 2023-09-14 +tags: [bert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mvr_squad_bert_base_multilingual_cased` is a Multilingual model originally trained by dyyyyyyyy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mvr_squad_bert_base_multilingual_cased_xx_5.1.1_3.0_1694658258601.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mvr_squad_bert_base_multilingual_cased_xx_5.1.1_3.0_1694658258601.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mvr_squad_bert_base_multilingual_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mvr_squad_bert_base_multilingual_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mvr_squad_bert_base_multilingual_cased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|665.0 MB| + +## References + +https://huggingface.co/dyyyyyyyy/MVR_squad_BERT-base-multilingual-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-mymodel_en.md b/docs/_posts/ahmedlone127/2023-09-14-mymodel_en.md new file mode 100644 index 00000000000000..d47053633850ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-mymodel_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mymodel BertEmbeddings from heima +author: John Snow Labs +name: mymodel +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mymodel` is a English model originally trained by heima. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mymodel_en_5.1.1_3.0_1694655891652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mymodel_en_5.1.1_3.0_1694655891652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("mymodel","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("mymodel", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mymodel| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/heima/mymodel \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-news_contrastive_pretrain_en.md b/docs/_posts/ahmedlone127/2023-09-14-news_contrastive_pretrain_en.md new file mode 100644 index 00000000000000..6490b428eea3e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-news_contrastive_pretrain_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English news_contrastive_pretrain BertEmbeddings from yyu +author: John Snow Labs +name: news_contrastive_pretrain +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`news_contrastive_pretrain` is a English model originally trained by yyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/news_contrastive_pretrain_en_5.1.1_3.0_1694675368213.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/news_contrastive_pretrain_en_5.1.1_3.0_1694675368213.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("news_contrastive_pretrain","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("news_contrastive_pretrain", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|news_contrastive_pretrain| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/yyu/news_contrastive_pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-oyo_bert_base_yo.md b/docs/_posts/ahmedlone127/2023-09-14-oyo_bert_base_yo.md new file mode 100644 index 00000000000000..a318f643294551 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-oyo_bert_base_yo.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Yoruba oyo_bert_base BertEmbeddings from Davlan +author: John Snow Labs +name: oyo_bert_base +date: 2023-09-14 +tags: [bert, yo, open_source, fill_mask, onnx] +task: Embeddings +language: yo +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`oyo_bert_base` is a Yoruba model originally trained by Davlan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/oyo_bert_base_yo_5.1.1_3.0_1694663927219.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/oyo_bert_base_yo_5.1.1_3.0_1694663927219.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("oyo_bert_base","yo") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("oyo_bert_base", "yo") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|oyo_bert_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|yo| +|Size:|412.5 MB| + +## References + +https://huggingface.co/Davlan/oyo-bert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-parlbert_german_law_de.md b/docs/_posts/ahmedlone127/2023-09-14-parlbert_german_law_de.md new file mode 100644 index 00000000000000..f50623989eecdf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-parlbert_german_law_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German parlbert_german_law BertEmbeddings from InfAI +author: John Snow Labs +name: parlbert_german_law +date: 2023-09-14 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`parlbert_german_law` is a German model originally trained by InfAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/parlbert_german_law_de_5.1.1_3.0_1694667648647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/parlbert_german_law_de_5.1.1_3.0_1694667648647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("parlbert_german_law","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("parlbert_german_law", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|parlbert_german_law| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.8 MB| + +## References + +https://huggingface.co/InfAI/parlbert-german-law \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-project3_model_en.md b/docs/_posts/ahmedlone127/2023-09-14-project3_model_en.md new file mode 100644 index 00000000000000..3b96943457b19b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-project3_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English project3_model BertEmbeddings from nithya +author: John Snow Labs +name: project3_model +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`project3_model` is a English model originally trained by nithya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/project3_model_en_5.1.1_3.0_1694662572357.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/project3_model_en_5.1.1_3.0_1694662572357.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("project3_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("project3_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|project3_model| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/nithya/project3-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-public_models_en.md b/docs/_posts/ahmedlone127/2023-09-14-public_models_en.md new file mode 100644 index 00000000000000..0c01d10eb05cd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-public_models_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English public_models BertEmbeddings from helloNet +author: John Snow Labs +name: public_models +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`public_models` is a English model originally trained by helloNet. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/public_models_en_5.1.1_3.0_1694656362220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/public_models_en_5.1.1_3.0_1694656362220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("public_models","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("public_models", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|public_models| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/helloNet/public_models \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-radbert_en.md b/docs/_posts/ahmedlone127/2023-09-14-radbert_en.md new file mode 100644 index 00000000000000..48d8e85eaec6d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-radbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English radbert BertEmbeddings from StanfordAIMI +author: John Snow Labs +name: radbert +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`radbert` is a English model originally trained by StanfordAIMI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/radbert_en_5.1.1_3.0_1694656544468.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/radbert_en_5.1.1_3.0_1694656544468.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("radbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("radbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|radbert| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.5 MB| + +## References + +https://huggingface.co/StanfordAIMI/RadBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-remote_sensing_distilbert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-14-remote_sensing_distilbert_cased_en.md new file mode 100644 index 00000000000000..08457f46c61478 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-remote_sensing_distilbert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English remote_sensing_distilbert_cased DistilBertEmbeddings from Chramer +author: John Snow Labs +name: remote_sensing_distilbert_cased +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`remote_sensing_distilbert_cased` is a English model originally trained by Chramer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/remote_sensing_distilbert_cased_en_5.1.2_3.0_1694735724243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/remote_sensing_distilbert_cased_en_5.1.2_3.0_1694735724243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("remote_sensing_distilbert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("remote_sensing_distilbert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|remote_sensing_distilbert_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/Chramer/remote-sensing-distilbert-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-retromae_msmarco_distill_en.md b/docs/_posts/ahmedlone127/2023-09-14-retromae_msmarco_distill_en.md new file mode 100644 index 00000000000000..c6bb0e888a5a06 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-retromae_msmarco_distill_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English retromae_msmarco_distill BertEmbeddings from Shitao +author: John Snow Labs +name: retromae_msmarco_distill +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`retromae_msmarco_distill` is a English model originally trained by Shitao. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/retromae_msmarco_distill_en_5.1.1_3.0_1694650755532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/retromae_msmarco_distill_en_5.1.1_3.0_1694650755532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("retromae_msmarco_distill","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("retromae_msmarco_distill", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|retromae_msmarco_distill| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.7 MB| + +## References + +https://huggingface.co/Shitao/RetroMAE_MSMARCO_distill \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-review_contrastive_pretrain_en.md b/docs/_posts/ahmedlone127/2023-09-14-review_contrastive_pretrain_en.md new file mode 100644 index 00000000000000..71b5eb863ebba0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-review_contrastive_pretrain_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English review_contrastive_pretrain BertEmbeddings from yyu +author: John Snow Labs +name: review_contrastive_pretrain +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`review_contrastive_pretrain` is a English model originally trained by yyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/review_contrastive_pretrain_en_5.1.1_3.0_1694676140447.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/review_contrastive_pretrain_en_5.1.1_3.0_1694676140447.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("review_contrastive_pretrain","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("review_contrastive_pretrain", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|review_contrastive_pretrain| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.3 MB| + +## References + +https://huggingface.co/yyu/review_contrastive_pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-sagorbert_nwp_finetuning_test2_en.md b/docs/_posts/ahmedlone127/2023-09-14-sagorbert_nwp_finetuning_test2_en.md new file mode 100644 index 00000000000000..8445a53a13997a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-sagorbert_nwp_finetuning_test2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sagorbert_nwp_finetuning_test2 BertEmbeddings from amirhamza11 +author: John Snow Labs +name: sagorbert_nwp_finetuning_test2 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sagorbert_nwp_finetuning_test2` is a English model originally trained by amirhamza11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sagorbert_nwp_finetuning_test2_en_5.1.1_3.0_1694659149656.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sagorbert_nwp_finetuning_test2_en_5.1.1_3.0_1694659149656.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sagorbert_nwp_finetuning_test2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sagorbert_nwp_finetuning_test2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sagorbert_nwp_finetuning_test2| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|614.8 MB| + +## References + +https://huggingface.co/amirhamza11/sagorbert_nwp_finetuning_test2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-sagorbert_nwp_finetuning_test4_en.md b/docs/_posts/ahmedlone127/2023-09-14-sagorbert_nwp_finetuning_test4_en.md new file mode 100644 index 00000000000000..1a7e72dbe5b021 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-sagorbert_nwp_finetuning_test4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sagorbert_nwp_finetuning_test4 BertEmbeddings from amirhamza11 +author: John Snow Labs +name: sagorbert_nwp_finetuning_test4 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sagorbert_nwp_finetuning_test4` is a English model originally trained by amirhamza11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sagorbert_nwp_finetuning_test4_en_5.1.1_3.0_1694662734694.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sagorbert_nwp_finetuning_test4_en_5.1.1_3.0_1694662734694.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("sagorbert_nwp_finetuning_test4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("sagorbert_nwp_finetuning_test4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sagorbert_nwp_finetuning_test4| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|614.9 MB| + +## References + +https://huggingface.co/amirhamza11/sagorbert_nwp_finetuning_test4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-scholarbert_100_64bit_en.md b/docs/_posts/ahmedlone127/2023-09-14-scholarbert_100_64bit_en.md new file mode 100644 index 00000000000000..f8902d9a65e07a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-scholarbert_100_64bit_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English scholarbert_100_64bit BertEmbeddings from globuslabs +author: John Snow Labs +name: scholarbert_100_64bit +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`scholarbert_100_64bit` is a English model originally trained by globuslabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/scholarbert_100_64bit_en_5.1.1_3.0_1694668681788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/scholarbert_100_64bit_en_5.1.1_3.0_1694668681788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("scholarbert_100_64bit","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("scholarbert_100_64bit", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|scholarbert_100_64bit| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|842.4 MB| + +## References + +https://huggingface.co/globuslabs/ScholarBERT_100_64bit \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-skc_mlm_german_torch_de.md b/docs/_posts/ahmedlone127/2023-09-14-skc_mlm_german_torch_de.md new file mode 100644 index 00000000000000..ffb89c33cc68f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-skc_mlm_german_torch_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German skc_mlm_german_torch BertEmbeddings from Tobias +author: John Snow Labs +name: skc_mlm_german_torch +date: 2023-09-14 +tags: [bert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`skc_mlm_german_torch` is a German model originally trained by Tobias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/skc_mlm_german_torch_de_5.1.1_3.0_1694663119516.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/skc_mlm_german_torch_de_5.1.1_3.0_1694663119516.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("skc_mlm_german_torch","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("skc_mlm_german_torch", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|skc_mlm_german_torch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|406.9 MB| + +## References + +https://huggingface.co/Tobias/skc_MLM_German_torch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-small_mlm_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-14-small_mlm_imdb_en.md new file mode 100644 index 00000000000000..08a988389d73b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-small_mlm_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_imdb BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_imdb +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_imdb` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_imdb_en_5.1.1_3.0_1694664931729.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_imdb_en_5.1.1_3.0_1694664931729.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|107.0 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-small_mlm_tweet_en.md b/docs/_posts/ahmedlone127/2023-09-14-small_mlm_tweet_en.md new file mode 100644 index 00000000000000..e354adee2a69c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-small_mlm_tweet_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English small_mlm_tweet BertEmbeddings from muhtasham +author: John Snow Labs +name: small_mlm_tweet +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`small_mlm_tweet` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/small_mlm_tweet_en_5.1.1_3.0_1694664219703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/small_mlm_tweet_en_5.1.1_3.0_1694664219703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("small_mlm_tweet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("small_mlm_tweet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|small_mlm_tweet| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|106.9 MB| + +## References + +https://huggingface.co/muhtasham/small-mlm-tweet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-splade_cocondenser_ensembledistil_en.md b/docs/_posts/ahmedlone127/2023-09-14-splade_cocondenser_ensembledistil_en.md new file mode 100644 index 00000000000000..0f067aeb8edb23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-splade_cocondenser_ensembledistil_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_cocondenser_ensembledistil BertEmbeddings from naver +author: John Snow Labs +name: splade_cocondenser_ensembledistil +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_cocondenser_ensembledistil` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_cocondenser_ensembledistil_en_5.1.1_3.0_1694661773914.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_cocondenser_ensembledistil_en_5.1.1_3.0_1694661773914.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_cocondenser_ensembledistil","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_cocondenser_ensembledistil", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_cocondenser_ensembledistil| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/naver/splade-cocondenser-ensembledistil \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-splade_cocondenser_selfdistil_naver_en.md b/docs/_posts/ahmedlone127/2023-09-14-splade_cocondenser_selfdistil_naver_en.md new file mode 100644 index 00000000000000..837a77557b8ae5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-splade_cocondenser_selfdistil_naver_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_cocondenser_selfdistil_naver BertEmbeddings from naver +author: John Snow Labs +name: splade_cocondenser_selfdistil_naver +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_cocondenser_selfdistil_naver` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_cocondenser_selfdistil_naver_en_5.1.1_3.0_1694661508394.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_cocondenser_selfdistil_naver_en_5.1.1_3.0_1694661508394.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("splade_cocondenser_selfdistil_naver","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("splade_cocondenser_selfdistil_naver", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_cocondenser_selfdistil_naver| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/naver/splade-cocondenser-selfdistil \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-telugu_bert_scratch_te.md b/docs/_posts/ahmedlone127/2023-09-14-telugu_bert_scratch_te.md new file mode 100644 index 00000000000000..4aa47d4b618289 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-telugu_bert_scratch_te.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Telugu telugu_bert_scratch BertEmbeddings from l3cube-pune +author: John Snow Labs +name: telugu_bert_scratch +date: 2023-09-14 +tags: [bert, te, open_source, fill_mask, onnx] +task: Embeddings +language: te +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`telugu_bert_scratch` is a Telugu model originally trained by l3cube-pune. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/telugu_bert_scratch_te_5.1.1_3.0_1694651570626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/telugu_bert_scratch_te_5.1.1_3.0_1694651570626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("telugu_bert_scratch","te") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("telugu_bert_scratch", "te") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|telugu_bert_scratch| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|te| +|Size:|470.5 MB| + +## References + +https://huggingface.co/l3cube-pune/telugu-bert-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-test_bert_base_spanish_wwm_cased_finetuned_ultrasounds_en.md b/docs/_posts/ahmedlone127/2023-09-14-test_bert_base_spanish_wwm_cased_finetuned_ultrasounds_en.md new file mode 100644 index 00000000000000..a8a9587cb1dd00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-test_bert_base_spanish_wwm_cased_finetuned_ultrasounds_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_bert_base_spanish_wwm_cased_finetuned_ultrasounds BertEmbeddings from manucos +author: John Snow Labs +name: test_bert_base_spanish_wwm_cased_finetuned_ultrasounds +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_bert_base_spanish_wwm_cased_finetuned_ultrasounds` is a English model originally trained by manucos. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_bert_base_spanish_wwm_cased_finetuned_ultrasounds_en_5.1.1_3.0_1694663393018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_bert_base_spanish_wwm_cased_finetuned_ultrasounds_en_5.1.1_3.0_1694663393018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_bert_base_spanish_wwm_cased_finetuned_ultrasounds","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_bert_base_spanish_wwm_cased_finetuned_ultrasounds", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_bert_base_spanish_wwm_cased_finetuned_ultrasounds| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/manucos/test-bert-base-spanish-wwm-cased-finetuned-ultrasounds \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-test_bert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-14-test_bert_base_uncased_en.md new file mode 100644 index 00000000000000..ca35916c7f1021 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-test_bert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_bert_base_uncased BertEmbeddings from kkkzzzkkk +author: John Snow Labs +name: test_bert_base_uncased +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_bert_base_uncased` is a English model originally trained by kkkzzzkkk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_bert_base_uncased_en_5.1.1_3.0_1694663392938.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_bert_base_uncased_en_5.1.1_3.0_1694663392938.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_bert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_bert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_bert_base_uncased| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/kkkzzzkkk/test_bert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-test_dushen_en.md b/docs/_posts/ahmedlone127/2023-09-14-test_dushen_en.md new file mode 100644 index 00000000000000..80ab3cd98bf81c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-test_dushen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_dushen BertEmbeddings from dushen +author: John Snow Labs +name: test_dushen +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_dushen` is a English model originally trained by dushen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_dushen_en_5.1.1_3.0_1694653639422.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_dushen_en_5.1.1_3.0_1694653639422.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("test_dushen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("test_dushen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_dushen| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.2 MB| + +## References + +https://huggingface.co/dushen/test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-tiny_mlm_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-14-tiny_mlm_imdb_en.md new file mode 100644 index 00000000000000..a95b493f28606b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-tiny_mlm_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_imdb BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_imdb +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_imdb` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_imdb_en_5.1.1_3.0_1694663665545.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_imdb_en_5.1.1_3.0_1694663665545.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_imdb| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-tiny_mlm_tweet_en.md b/docs/_posts/ahmedlone127/2023-09-14-tiny_mlm_tweet_en.md new file mode 100644 index 00000000000000..7ab71d2936ee20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-tiny_mlm_tweet_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_mlm_tweet BertEmbeddings from muhtasham +author: John Snow Labs +name: tiny_mlm_tweet +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_mlm_tweet` is a English model originally trained by muhtasham. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_mlm_tweet_en_5.1.1_3.0_1694663791725.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_mlm_tweet_en_5.1.1_3.0_1694663791725.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("tiny_mlm_tweet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("tiny_mlm_tweet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_mlm_tweet| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|16.7 MB| + +## References + +https://huggingface.co/muhtasham/tiny-mlm-tweet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-train_mask_language_model_en.md b/docs/_posts/ahmedlone127/2023-09-14-train_mask_language_model_en.md new file mode 100644 index 00000000000000..fefc807be955e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-train_mask_language_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English train_mask_language_model DistilBertEmbeddings from hubert10 +author: John Snow Labs +name: train_mask_language_model +date: 2023-09-14 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`train_mask_language_model` is a English model originally trained by hubert10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/train_mask_language_model_en_5.1.2_3.0_1694735909215.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/train_mask_language_model_en_5.1.2_3.0_1694735909215.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("train_mask_language_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("train_mask_language_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|train_mask_language_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hubert10/train_mask_language_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-ucb_bert_finetunned_en.md b/docs/_posts/ahmedlone127/2023-09-14-ucb_bert_finetunned_en.md new file mode 100644 index 00000000000000..2ab6c71bbc765b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-ucb_bert_finetunned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ucb_bert_finetunned BertEmbeddings from Diegomejia +author: John Snow Labs +name: ucb_bert_finetunned +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ucb_bert_finetunned` is a English model originally trained by Diegomejia. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ucb_bert_finetunned_en_5.1.1_3.0_1694661140033.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ucb_bert_finetunned_en_5.1.1_3.0_1694661140033.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("ucb_bert_finetunned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("ucb_bert_finetunned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ucb_bert_finetunned| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|409.5 MB| + +## References + +https://huggingface.co/Diegomejia/ucb-bert-finetunned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-vatestnew_en.md b/docs/_posts/ahmedlone127/2023-09-14-vatestnew_en.md new file mode 100644 index 00000000000000..6eaaf25ddb1f10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-vatestnew_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English vatestnew BertEmbeddings from mtluczek80 +author: John Snow Labs +name: vatestnew +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vatestnew` is a English model originally trained by mtluczek80. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vatestnew_en_5.1.1_3.0_1694657391978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vatestnew_en_5.1.1_3.0_1694657391978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("vatestnew","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("vatestnew", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vatestnew| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|403.6 MB| + +## References + +https://huggingface.co/mtluczek80/VATestNew \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-vbert_2021_base_en.md b/docs/_posts/ahmedlone127/2023-09-14-vbert_2021_base_en.md new file mode 100644 index 00000000000000..9f8d9584a3a562 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-vbert_2021_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English vbert_2021_base BertEmbeddings from VMware +author: John Snow Labs +name: vbert_2021_base +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`vbert_2021_base` is a English model originally trained by VMware. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/vbert_2021_base_en_5.1.1_3.0_1694664971607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/vbert_2021_base_en_5.1.1_3.0_1694664971607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("vbert_2021_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("vbert_2021_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|vbert_2021_base| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.1 MB| + +## References + +https://huggingface.co/VMware/vbert-2021-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-weights_bert_mlm_epoch50_en.md b/docs/_posts/ahmedlone127/2023-09-14-weights_bert_mlm_epoch50_en.md new file mode 100644 index 00000000000000..d3405b6f4c8b2f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-weights_bert_mlm_epoch50_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English weights_bert_mlm_epoch50 BertEmbeddings from grumpy +author: John Snow Labs +name: weights_bert_mlm_epoch50 +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`weights_bert_mlm_epoch50` is a English model originally trained by grumpy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/weights_bert_mlm_epoch50_en_5.1.1_3.0_1694652096710.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/weights_bert_mlm_epoch50_en_5.1.1_3.0_1694652096710.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("weights_bert_mlm_epoch50","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("weights_bert_mlm_epoch50", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|weights_bert_mlm_epoch50| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|405.9 MB| + +## References + +https://huggingface.co/grumpy/weights_bert_mlm_epoch50 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-wiki_contrastive_pretrain_en.md b/docs/_posts/ahmedlone127/2023-09-14-wiki_contrastive_pretrain_en.md new file mode 100644 index 00000000000000..f5c798e105a71a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-wiki_contrastive_pretrain_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English wiki_contrastive_pretrain BertEmbeddings from yyu +author: John Snow Labs +name: wiki_contrastive_pretrain +date: 2023-09-14 +tags: [bert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`wiki_contrastive_pretrain` is a English model originally trained by yyu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/wiki_contrastive_pretrain_en_5.1.1_3.0_1694676032643.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/wiki_contrastive_pretrain_en_5.1.1_3.0_1694676032643.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("wiki_contrastive_pretrain","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("wiki_contrastive_pretrain", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|wiki_contrastive_pretrain| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|407.4 MB| + +## References + +https://huggingface.co/yyu/wiki_contrastive_pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-14-word_ethical_ko.md b/docs/_posts/ahmedlone127/2023-09-14-word_ethical_ko.md new file mode 100644 index 00000000000000..04b4d7f98c066d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-14-word_ethical_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean word_ethical BertEmbeddings from julian5383 +author: John Snow Labs +name: word_ethical +date: 2023-09-14 +tags: [bert, ko, open_source, fill_mask, onnx] +task: Embeddings +language: ko +edition: Spark NLP 5.1.1 +spark_version: 3.0 +supported: true +engine: onnx +annotator: BertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained BertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`word_ethical` is a Korean model originally trained by julian5383. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/word_ethical_ko_5.1.1_3.0_1694670073884.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/word_ethical_ko_5.1.1_3.0_1694670073884.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =BertEmbeddings.pretrained("word_ethical","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = BertEmbeddings + .pretrained("word_ethical", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|word_ethical| +|Compatibility:|Spark NLP 5.1.1+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ko| +|Size:|421.2 MB| + +## References + +https://huggingface.co/julian5383/word_ethical \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-100_sdb_tbb_en.md b/docs/_posts/ahmedlone127/2023-09-15-100_sdb_tbb_en.md new file mode 100644 index 00000000000000..eea668d4b339e8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-100_sdb_tbb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 100_sdb_tbb DistilBertEmbeddings from sripadhstudy +author: John Snow Labs +name: 100_sdb_tbb +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`100_sdb_tbb` is a English model originally trained by sripadhstudy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/100_sdb_tbb_en_5.1.2_3.0_1694784123220.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/100_sdb_tbb_en_5.1.2_3.0_1694784123220.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("100_sdb_tbb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("100_sdb_tbb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|100_sdb_tbb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/sripadhstudy/100_SDB_TBB \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version1_en.md b/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version1_en.md new file mode 100644 index 00000000000000..fea219717006e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 20split_dataset_version1 DistilBertEmbeddings from Billwzl +author: John Snow Labs +name: 20split_dataset_version1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20split_dataset_version1` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20split_dataset_version1_en_5.1.2_3.0_1694780611703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20split_dataset_version1_en_5.1.2_3.0_1694780611703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("20split_dataset_version1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("20split_dataset_version1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20split_dataset_version1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Billwzl/20split_dataset_version1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version2_en.md b/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version2_en.md new file mode 100644 index 00000000000000..5a5ae58f9c6d35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 20split_dataset_version2 DistilBertEmbeddings from Billwzl +author: John Snow Labs +name: 20split_dataset_version2 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20split_dataset_version2` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20split_dataset_version2_en_5.1.2_3.0_1694781304116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20split_dataset_version2_en_5.1.2_3.0_1694781304116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("20split_dataset_version2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("20split_dataset_version2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20split_dataset_version2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.0 MB| + +## References + +https://huggingface.co/Billwzl/20split_dataset_version2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version3_en.md b/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version3_en.md new file mode 100644 index 00000000000000..1676b2b1aa852d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 20split_dataset_version3 DistilBertEmbeddings from Billwzl +author: John Snow Labs +name: 20split_dataset_version3 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20split_dataset_version3` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20split_dataset_version3_en_5.1.2_3.0_1694781556318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20split_dataset_version3_en_5.1.2_3.0_1694781556318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("20split_dataset_version3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("20split_dataset_version3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20split_dataset_version3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Billwzl/20split_dataset_version3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version4_en.md b/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version4_en.md new file mode 100644 index 00000000000000..4114cdc080b445 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-20split_dataset_version4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 20split_dataset_version4 DistilBertEmbeddings from Billwzl +author: John Snow Labs +name: 20split_dataset_version4 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`20split_dataset_version4` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/20split_dataset_version4_en_5.1.2_3.0_1694782055167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/20split_dataset_version4_en_5.1.2_3.0_1694782055167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("20split_dataset_version4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("20split_dataset_version4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|20split_dataset_version4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Billwzl/20split_dataset_version4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-500_sdb_tbb_en.md b/docs/_posts/ahmedlone127/2023-09-15-500_sdb_tbb_en.md new file mode 100644 index 00000000000000..43e9f72176801e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-500_sdb_tbb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English 500_sdb_tbb DistilBertEmbeddings from sripadhstudy +author: John Snow Labs +name: 500_sdb_tbb +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`500_sdb_tbb` is a English model originally trained by sripadhstudy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/500_sdb_tbb_en_5.1.2_3.0_1694784390417.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/500_sdb_tbb_en_5.1.2_3.0_1694784390417.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("500_sdb_tbb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("500_sdb_tbb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|500_sdb_tbb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/sripadhstudy/500_SDB_TBB \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-aave_distil_bert_en.md b/docs/_posts/ahmedlone127/2023-09-15-aave_distil_bert_en.md new file mode 100644 index 00000000000000..0c6b3141a32a29 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-aave_distil_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English aave_distil_bert DistilBertEmbeddings from csalaam +author: John Snow Labs +name: aave_distil_bert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`aave_distil_bert` is a English model originally trained by csalaam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/aave_distil_bert_en_5.1.2_3.0_1694781245405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/aave_distil_bert_en_5.1.2_3.0_1694781245405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("aave_distil_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("aave_distil_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|aave_distil_bert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/csalaam/AAVE-distil-bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-absa_with_maskedlm_finetuned_sentihood_en.md b/docs/_posts/ahmedlone127/2023-09-15-absa_with_maskedlm_finetuned_sentihood_en.md new file mode 100644 index 00000000000000..94283894826b54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-absa_with_maskedlm_finetuned_sentihood_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English absa_with_maskedlm_finetuned_sentihood DistilBertEmbeddings from UchihaMadara +author: John Snow Labs +name: absa_with_maskedlm_finetuned_sentihood +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`absa_with_maskedlm_finetuned_sentihood` is a English model originally trained by UchihaMadara. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/absa_with_maskedlm_finetuned_sentihood_en_5.1.2_3.0_1694786043453.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/absa_with_maskedlm_finetuned_sentihood_en_5.1.2_3.0_1694786043453.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("absa_with_maskedlm_finetuned_sentihood","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("absa_with_maskedlm_finetuned_sentihood", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|absa_with_maskedlm_finetuned_sentihood| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/UchihaMadara/ABSA-with-MaskedLM-finetuned-sentihood \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-ad_distilbert_base_uncased_finetuned_dantest2_en.md b/docs/_posts/ahmedlone127/2023-09-15-ad_distilbert_base_uncased_finetuned_dantest2_en.md new file mode 100644 index 00000000000000..2ac718547ae413 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-ad_distilbert_base_uncased_finetuned_dantest2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ad_distilbert_base_uncased_finetuned_dantest2 DistilBertEmbeddings from refringence +author: John Snow Labs +name: ad_distilbert_base_uncased_finetuned_dantest2 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ad_distilbert_base_uncased_finetuned_dantest2` is a English model originally trained by refringence. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ad_distilbert_base_uncased_finetuned_dantest2_en_5.1.2_3.0_1694777623781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ad_distilbert_base_uncased_finetuned_dantest2_en_5.1.2_3.0_1694777623781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("ad_distilbert_base_uncased_finetuned_dantest2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("ad_distilbert_base_uncased_finetuned_dantest2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ad_distilbert_base_uncased_finetuned_dantest2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/refringence/ad-distilbert-base-uncased-finetuned-dantest2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bert_base_uncased_finetuned_imdb_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-15-bert_base_uncased_finetuned_imdb_accelerate_en.md new file mode 100644 index 00000000000000..8b2b3e14125407 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bert_base_uncased_finetuned_imdb_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_base_uncased_finetuned_imdb_accelerate DistilBertEmbeddings from Arthuerwang +author: John Snow Labs +name: bert_base_uncased_finetuned_imdb_accelerate +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_base_uncased_finetuned_imdb_accelerate` is a English model originally trained by Arthuerwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_imdb_accelerate_en_5.1.2_3.0_1694783561288.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_base_uncased_finetuned_imdb_accelerate_en_5.1.2_3.0_1694783561288.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bert_base_uncased_finetuned_imdb_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bert_base_uncased_finetuned_imdb_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_base_uncased_finetuned_imdb_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Arthuerwang/bert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bert_distil_ct_en.md b/docs/_posts/ahmedlone127/2023-09-15-bert_distil_ct_en.md new file mode 100644 index 00000000000000..59445a5eb45611 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bert_distil_ct_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_distil_ct DistilBertEmbeddings from Contrastive-Tension +author: John Snow Labs +name: bert_distil_ct +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_distil_ct` is a English model originally trained by Contrastive-Tension. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_distil_ct_en_5.1.2_3.0_1694770957596.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_distil_ct_en_5.1.2_3.0_1694770957596.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bert_distil_ct","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bert_distil_ct", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_distil_ct| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Contrastive-Tension/BERT-Distil-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bert_distil_nli_ct_en.md b/docs/_posts/ahmedlone127/2023-09-15-bert_distil_nli_ct_en.md new file mode 100644 index 00000000000000..263aedc5f9ebe9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bert_distil_nli_ct_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_distil_nli_ct DistilBertEmbeddings from Contrastive-Tension +author: John Snow Labs +name: bert_distil_nli_ct +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_distil_nli_ct` is a English model originally trained by Contrastive-Tension. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_distil_nli_ct_en_5.1.2_3.0_1694771080239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_distil_nli_ct_en_5.1.2_3.0_1694771080239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bert_distil_nli_ct","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bert_distil_nli_ct", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_distil_nli_ct| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Contrastive-Tension/BERT-Distil-NLI-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bert_name_en.md b/docs/_posts/ahmedlone127/2023-09-15-bert_name_en.md new file mode 100644 index 00000000000000..adb4957f0414db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bert_name_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_name DistilBertEmbeddings from Ondiet +author: John Snow Labs +name: bert_name +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_name` is a English model originally trained by Ondiet. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_name_en_5.1.2_3.0_1694788522767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_name_en_5.1.2_3.0_1694788522767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bert_name","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bert_name", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_name| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ondiet/bert_name \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bert_news_en.md b/docs/_posts/ahmedlone127/2023-09-15-bert_news_en.md new file mode 100644 index 00000000000000..b8f3700c4de564 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bert_news_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_news DistilBertEmbeddings from harvinder676 +author: John Snow Labs +name: bert_news +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_news` is a English model originally trained by harvinder676. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_news_en_5.1.2_3.0_1694782914172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_news_en_5.1.2_3.0_1694782914172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bert_news","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bert_news", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_news| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/harvinder676/bert-news \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bert_pretrain_en.md b/docs/_posts/ahmedlone127/2023-09-15-bert_pretrain_en.md new file mode 100644 index 00000000000000..34886cf1e9c64d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bert_pretrain_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_pretrain DistilBertEmbeddings from catofnull +author: John Snow Labs +name: bert_pretrain +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_pretrain` is a English model originally trained by catofnull. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_pretrain_en_5.1.2_3.0_1694775655895.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_pretrain_en_5.1.2_3.0_1694775655895.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bert_pretrain","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bert_pretrain", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_pretrain| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/catofnull/BERT-Pretrain \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bert_tuned_trial_20_12_2022_en.md b/docs/_posts/ahmedlone127/2023-09-15-bert_tuned_trial_20_12_2022_en.md new file mode 100644 index 00000000000000..dc68fbe89125c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bert_tuned_trial_20_12_2022_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bert_tuned_trial_20_12_2022 DistilBertEmbeddings from EslamAhmed +author: John Snow Labs +name: bert_tuned_trial_20_12_2022 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bert_tuned_trial_20_12_2022` is a English model originally trained by EslamAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bert_tuned_trial_20_12_2022_en_5.1.2_3.0_1694784215749.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bert_tuned_trial_20_12_2022_en_5.1.2_3.0_1694784215749.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bert_tuned_trial_20_12_2022","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bert_tuned_trial_20_12_2022", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bert_tuned_trial_20_12_2022| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|447.5 MB| + +## References + +https://huggingface.co/EslamAhmed/BERT_tuned_trial_20-12-2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bertfined_finetunedmodel_fakenews_en.md b/docs/_posts/ahmedlone127/2023-09-15-bertfined_finetunedmodel_fakenews_en.md new file mode 100644 index 00000000000000..e11b830920c741 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bertfined_finetunedmodel_fakenews_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertfined_finetunedmodel_fakenews DistilBertEmbeddings from Sakil +author: John Snow Labs +name: bertfined_finetunedmodel_fakenews +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertfined_finetunedmodel_fakenews` is a English model originally trained by Sakil. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertfined_finetunedmodel_fakenews_en_5.1.2_3.0_1694790199446.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertfined_finetunedmodel_fakenews_en_5.1.2_3.0_1694790199446.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bertfined_finetunedmodel_fakenews","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bertfined_finetunedmodel_fakenews", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertfined_finetunedmodel_fakenews| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/Sakil/bertfined_finetunedmodel_fakenews \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bertino_it.md b/docs/_posts/ahmedlone127/2023-09-15-bertino_it.md new file mode 100644 index 00000000000000..154883d0285a3d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bertino_it.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Italian bertino DistilBertEmbeddings from indigo-ai +author: John Snow Labs +name: bertino +date: 2023-09-15 +tags: [distilbert, it, open_source, fill_mask, onnx] +task: Embeddings +language: it +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertino` is a Italian model originally trained by indigo-ai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertino_it_5.1.2_3.0_1694780792709.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertino_it_5.1.2_3.0_1694780792709.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bertino","it") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bertino", "it") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertino| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|it| +|Size:|253.0 MB| + +## References + +https://huggingface.co/indigo-ai/BERTino \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-bertino_lsg_en.md b/docs/_posts/ahmedlone127/2023-09-15-bertino_lsg_en.md new file mode 100644 index 00000000000000..30ab6840313ef8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-bertino_lsg_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English bertino_lsg DistilBertEmbeddings from efederici +author: John Snow Labs +name: bertino_lsg +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`bertino_lsg` is a English model originally trained by efederici. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/bertino_lsg_en_5.1.2_3.0_1694772729108.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/bertino_lsg_en_5.1.2_3.0_1694772729108.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("bertino_lsg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("bertino_lsg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|bertino_lsg| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|263.3 MB| + +## References + +https://huggingface.co/efederici/bertino-lsg \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-burmese_finetuned_distilbert_portuguese_en.md b/docs/_posts/ahmedlone127/2023-09-15-burmese_finetuned_distilbert_portuguese_en.md new file mode 100644 index 00000000000000..2f8110527e6642 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-burmese_finetuned_distilbert_portuguese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English burmese_finetuned_distilbert_portuguese DistilBertEmbeddings from jibi2906 +author: John Snow Labs +name: burmese_finetuned_distilbert_portuguese +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`burmese_finetuned_distilbert_portuguese` is a English model originally trained by jibi2906. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/burmese_finetuned_distilbert_portuguese_en_5.1.2_3.0_1694782684337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/burmese_finetuned_distilbert_portuguese_en_5.1.2_3.0_1694782684337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("burmese_finetuned_distilbert_portuguese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("burmese_finetuned_distilbert_portuguese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|burmese_finetuned_distilbert_portuguese| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jibi2906/my-finetuned-distilbert-pt \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-carrot_tuned_model_en.md b/docs/_posts/ahmedlone127/2023-09-15-carrot_tuned_model_en.md new file mode 100644 index 00000000000000..287a83bbc35942 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-carrot_tuned_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English carrot_tuned_model DistilBertEmbeddings from DaniloH +author: John Snow Labs +name: carrot_tuned_model +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`carrot_tuned_model` is a English model originally trained by DaniloH. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/carrot_tuned_model_en_5.1.2_3.0_1694781032170.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/carrot_tuned_model_en_5.1.2_3.0_1694781032170.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("carrot_tuned_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("carrot_tuned_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|carrot_tuned_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/DaniloH/carrot-tuned-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-clinical_bert_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-15-clinical_bert_finetuned_en.md new file mode 100644 index 00000000000000..f26d4c79c8962b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-clinical_bert_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinical_bert_finetuned DistilBertEmbeddings from aminghias +author: John Snow Labs +name: clinical_bert_finetuned +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinical_bert_finetuned` is a English model originally trained by aminghias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinical_bert_finetuned_en_5.1.2_3.0_1694784623028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinical_bert_finetuned_en_5.1.2_3.0_1694784623028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("clinical_bert_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("clinical_bert_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinical_bert_finetuned| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.3 MB| + +## References + +https://huggingface.co/aminghias/Clinical-BERT-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-clinicalbert_medicalai_en.md b/docs/_posts/ahmedlone127/2023-09-15-clinicalbert_medicalai_en.md new file mode 100644 index 00000000000000..aa008cdd4f9abb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-clinicalbert_medicalai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clinicalbert_medicalai DistilBertEmbeddings from medicalai +author: John Snow Labs +name: clinicalbert_medicalai +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clinicalbert_medicalai` is a English model originally trained by medicalai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clinicalbert_medicalai_en_5.1.2_3.0_1694775464907.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clinicalbert_medicalai_en_5.1.2_3.0_1694775464907.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("clinicalbert_medicalai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("clinicalbert_medicalai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clinicalbert_medicalai| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.3 MB| + +## References + +https://huggingface.co/medicalai/ClinicalBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-clr_pretrained_distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-clr_pretrained_distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..33d06d45924af0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-clr_pretrained_distilbert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English clr_pretrained_distilbert_base_uncased DistilBertEmbeddings from SauravMaheshkar +author: John Snow Labs +name: clr_pretrained_distilbert_base_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`clr_pretrained_distilbert_base_uncased` is a English model originally trained by SauravMaheshkar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/clr_pretrained_distilbert_base_uncased_en_5.1.2_3.0_1694779351433.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/clr_pretrained_distilbert_base_uncased_en_5.1.2_3.0_1694779351433.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("clr_pretrained_distilbert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("clr_pretrained_distilbert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|clr_pretrained_distilbert_base_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SauravMaheshkar/clr-pretrained-distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-crypto_bert_model_en.md b/docs/_posts/ahmedlone127/2023-09-15-crypto_bert_model_en.md new file mode 100644 index 00000000000000..ab9953467ca37a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-crypto_bert_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English crypto_bert_model DistilBertEmbeddings from vedantgoswami +author: John Snow Labs +name: crypto_bert_model +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`crypto_bert_model` is a English model originally trained by vedantgoswami. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/crypto_bert_model_en_5.1.2_3.0_1694787260868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/crypto_bert_model_en_5.1.2_3.0_1694787260868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("crypto_bert_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("crypto_bert_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|crypto_bert_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/vedantgoswami/crypto-bert-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-customer_data_tuned_trial_1_en.md b/docs/_posts/ahmedlone127/2023-09-15-customer_data_tuned_trial_1_en.md new file mode 100644 index 00000000000000..de65fef610203f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-customer_data_tuned_trial_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English customer_data_tuned_trial_1 DistilBertEmbeddings from EslamAhmed +author: John Snow Labs +name: customer_data_tuned_trial_1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`customer_data_tuned_trial_1` is a English model originally trained by EslamAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/customer_data_tuned_trial_1_en_5.1.2_3.0_1694770508030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/customer_data_tuned_trial_1_en_5.1.2_3.0_1694770508030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("customer_data_tuned_trial_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("customer_data_tuned_trial_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|customer_data_tuned_trial_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/EslamAhmed/customer_data_tuned_trial_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-dbert_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-15-dbert_finetuned_en.md new file mode 100644 index 00000000000000..f6124834e65b4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-dbert_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dbert_finetuned DistilBertEmbeddings from ksabeh +author: John Snow Labs +name: dbert_finetuned +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbert_finetuned` is a English model originally trained by ksabeh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbert_finetuned_en_5.1.2_3.0_1694772576304.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbert_finetuned_en_5.1.2_3.0_1694772576304.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("dbert_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("dbert_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbert_finetuned| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ksabeh/dbert-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-dbert_finetuned_g_en.md b/docs/_posts/ahmedlone127/2023-09-15-dbert_finetuned_g_en.md new file mode 100644 index 00000000000000..248dd699f61a0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-dbert_finetuned_g_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dbert_finetuned_g DistilBertEmbeddings from e-hossam96 +author: John Snow Labs +name: dbert_finetuned_g +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbert_finetuned_g` is a English model originally trained by e-hossam96. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbert_finetuned_g_en_5.1.2_3.0_1694775531375.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbert_finetuned_g_en_5.1.2_3.0_1694775531375.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("dbert_finetuned_g","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("dbert_finetuned_g", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbert_finetuned_g| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/e-hossam96/dbert-finetuned-g \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-dbert_rda_en.md b/docs/_posts/ahmedlone127/2023-09-15-dbert_rda_en.md new file mode 100644 index 00000000000000..2a5b3183080fd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-dbert_rda_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dbert_rda DistilBertEmbeddings from nkul +author: John Snow Labs +name: dbert_rda +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dbert_rda` is a English model originally trained by nkul. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dbert_rda_en_5.1.2_3.0_1694783312876.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dbert_rda_en_5.1.2_3.0_1694783312876.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("dbert_rda","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("dbert_rda", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dbert_rda| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/nkul/dbert-rda \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-debiasing_pre_trained_contextualised_embeddings_distil_bert_en.md b/docs/_posts/ahmedlone127/2023-09-15-debiasing_pre_trained_contextualised_embeddings_distil_bert_en.md new file mode 100644 index 00000000000000..6073c0a320d86f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-debiasing_pre_trained_contextualised_embeddings_distil_bert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English debiasing_pre_trained_contextualised_embeddings_distil_bert DistilBertEmbeddings from Daniel-Saeedi +author: John Snow Labs +name: debiasing_pre_trained_contextualised_embeddings_distil_bert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`debiasing_pre_trained_contextualised_embeddings_distil_bert` is a English model originally trained by Daniel-Saeedi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/debiasing_pre_trained_contextualised_embeddings_distil_bert_en_5.1.2_3.0_1694770008501.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/debiasing_pre_trained_contextualised_embeddings_distil_bert_en_5.1.2_3.0_1694770008501.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("debiasing_pre_trained_contextualised_embeddings_distil_bert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("debiasing_pre_trained_contextualised_embeddings_distil_bert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|debiasing_pre_trained_contextualised_embeddings_distil_bert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Daniel-Saeedi/debiasing_pre-trained_contextualised_embeddings_distil_bert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distil_bert_aave_large_en.md b/docs/_posts/ahmedlone127/2023-09-15-distil_bert_aave_large_en.md new file mode 100644 index 00000000000000..fc36f7295e21c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distil_bert_aave_large_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distil_bert_aave_large DistilBertEmbeddings from csalaam +author: John Snow Labs +name: distil_bert_aave_large +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distil_bert_aave_large` is a English model originally trained by csalaam. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distil_bert_aave_large_en_5.1.2_3.0_1694783341558.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distil_bert_aave_large_en_5.1.2_3.0_1694783341558.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distil_bert_aave_large","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distil_bert_aave_large", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distil_bert_aave_large| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/csalaam/distil-bert-aave-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distil_eng_en.md b/docs/_posts/ahmedlone127/2023-09-15-distil_eng_en.md new file mode 100644 index 00000000000000..81b436f220bfff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distil_eng_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distil_eng DistilBertEmbeddings from mboth +author: John Snow Labs +name: distil_eng +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distil_eng` is a English model originally trained by mboth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distil_eng_en_5.1.2_3.0_1694782258463.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distil_eng_en_5.1.2_3.0_1694782258463.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distil_eng","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distil_eng", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distil_eng| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.5 MB| + +## References + +https://huggingface.co/mboth/distil-eng \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_akuapem_twi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_akuapem_twi_cased_en.md new file mode 100644 index 00000000000000..12589472a53379 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_akuapem_twi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilabena_base_akuapem_twi_cased DistilBertEmbeddings from Ghana-NLP +author: John Snow Labs +name: distilabena_base_akuapem_twi_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilabena_base_akuapem_twi_cased` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilabena_base_akuapem_twi_cased_en_5.1.2_3.0_1694777845278.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilabena_base_akuapem_twi_cased_en_5.1.2_3.0_1694777845278.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilabena_base_akuapem_twi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilabena_base_akuapem_twi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilabena_base_akuapem_twi_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.3 MB| + +## References + +https://huggingface.co/Ghana-NLP/distilabena-base-akuapem-twi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_asante_twi_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_asante_twi_uncased_en.md new file mode 100644 index 00000000000000..21f83b38462445 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_asante_twi_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilabena_base_asante_twi_uncased DistilBertEmbeddings from Ghana-NLP +author: John Snow Labs +name: distilabena_base_asante_twi_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilabena_base_asante_twi_uncased` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilabena_base_asante_twi_uncased_en_5.1.2_3.0_1694778027436.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilabena_base_asante_twi_uncased_en_5.1.2_3.0_1694778027436.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilabena_base_asante_twi_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilabena_base_asante_twi_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilabena_base_asante_twi_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.2 MB| + +## References + +https://huggingface.co/Ghana-NLP/distilabena-base-asante-twi-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_v2_akuapem_twi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_v2_akuapem_twi_cased_en.md new file mode 100644 index 00000000000000..e4af1362a76f4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_v2_akuapem_twi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilabena_base_v2_akuapem_twi_cased DistilBertEmbeddings from Ghana-NLP +author: John Snow Labs +name: distilabena_base_v2_akuapem_twi_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilabena_base_v2_akuapem_twi_cased` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilabena_base_v2_akuapem_twi_cased_en_5.1.2_3.0_1694778202672.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilabena_base_v2_akuapem_twi_cased_en_5.1.2_3.0_1694778202672.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilabena_base_v2_akuapem_twi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilabena_base_v2_akuapem_twi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilabena_base_v2_akuapem_twi_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.0 MB| + +## References + +https://huggingface.co/Ghana-NLP/distilabena-base-v2-akuapem-twi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_v2_asante_twi_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_v2_asante_twi_uncased_en.md new file mode 100644 index 00000000000000..6f00bae234db9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilabena_base_v2_asante_twi_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilabena_base_v2_asante_twi_uncased DistilBertEmbeddings from Ghana-NLP +author: John Snow Labs +name: distilabena_base_v2_asante_twi_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilabena_base_v2_asante_twi_uncased` is a English model originally trained by Ghana-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilabena_base_v2_asante_twi_uncased_en_5.1.2_3.0_1694778363788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilabena_base_v2_asante_twi_uncased_en_5.1.2_3.0_1694778363788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilabena_base_v2_asante_twi_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilabena_base_v2_asante_twi_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilabena_base_v2_asante_twi_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.1 MB| + +## References + +https://huggingface.co/Ghana-NLP/distilabena-base-v2-asante-twi-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_add_pre_training_complete_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_add_pre_training_complete_en.md new file mode 100644 index 00000000000000..4df7e0415d31e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_add_pre_training_complete_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_add_pre_training_complete DistilBertEmbeddings from gokuls +author: John Snow Labs +name: distilbert_add_pre_training_complete +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_add_pre_training_complete` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_add_pre_training_complete_en_5.1.2_3.0_1694788672681.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_add_pre_training_complete_en_5.1.2_3.0_1694788672681.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_add_pre_training_complete","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_add_pre_training_complete", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_add_pre_training_complete| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gokuls/distilbert_add_pre-training-complete \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_add_pre_training_dim_96_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_add_pre_training_dim_96_en.md new file mode 100644 index 00000000000000..57c958c3c0f54c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_add_pre_training_dim_96_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_add_pre_training_dim_96 DistilBertEmbeddings from gokuls +author: John Snow Labs +name: distilbert_add_pre_training_dim_96 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_add_pre_training_dim_96` is a English model originally trained by gokuls. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_add_pre_training_dim_96_en_5.1.2_3.0_1694789132049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_add_pre_training_dim_96_en_5.1.2_3.0_1694789132049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_add_pre_training_dim_96","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_add_pre_training_dim_96", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_add_pre_training_dim_96| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|25.5 MB| + +## References + +https://huggingface.co/gokuls/distilbert_add_pre-training-dim-96 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_25lang_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_25lang_cased_xx.md new file mode 100644 index 00000000000000..c65abeb6e688bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_25lang_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual distilbert_base_25lang_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_25lang_cased +date: 2023-09-15 +tags: [distilbert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_25lang_cased` is a Multilingual model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_25lang_cased_xx_5.1.2_3.0_1694771346236.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_25lang_cased_xx_5.1.2_3.0_1694771346236.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_25lang_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_25lang_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_25lang_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|405.6 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-25lang-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_cased_fine_tuned_blbooksgenre_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_cased_fine_tuned_blbooksgenre_en.md new file mode 100644 index 00000000000000..8f237f066a1f42 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_cased_fine_tuned_blbooksgenre_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_cased_fine_tuned_blbooksgenre DistilBertEmbeddings from BritishLibraryLabs +author: John Snow Labs +name: distilbert_base_cased_fine_tuned_blbooksgenre +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_cased_fine_tuned_blbooksgenre` is a English model originally trained by BritishLibraryLabs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_fine_tuned_blbooksgenre_en_5.1.2_3.0_1694777850511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_cased_fine_tuned_blbooksgenre_en_5.1.2_3.0_1694777850511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_cased_fine_tuned_blbooksgenre","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_cased_fine_tuned_blbooksgenre", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_cased_fine_tuned_blbooksgenre| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/BritishLibraryLabs/distilbert-base-cased-fine-tuned-blbooksgenre \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_en_ar_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_en_ar_cased_en.md new file mode 100644 index 00000000000000..6a41bd16c4a3ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_en_ar_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_en_ar_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_en_ar_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_en_ar_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_en_ar_cased_en_5.1.2_3.0_1694736147424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_en_ar_cased_en_5.1.2_3.0_1694736147424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_en_ar_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_en_ar_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_en_ar_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|252.8 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_en_bg_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_en_bg_cased_en.md new file mode 100644 index 00000000000000..0cebd8e9fc7bfc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_en_bg_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_en_bg_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_en_bg_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_en_bg_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_en_bg_cased_en_5.1.2_3.0_1694736259560.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_en_bg_cased_en_5.1.2_3.0_1694736259560.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_en_bg_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_en_bg_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_en_bg_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|260.5 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-bg-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_arabic_cased_en.md new file mode 100644 index 00000000000000..d8b6aa20b2e8cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_arabic_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_arabic_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_arabic_cased_en_5.1.2_3.0_1694771816351.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_arabic_cased_en_5.1.2_3.0_1694771816351.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_arabic_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|252.8 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_bulgarian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_bulgarian_cased_en.md new file mode 100644 index 00000000000000..5f54413e5771a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_bulgarian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_bulgarian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_bulgarian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_bulgarian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_bulgarian_cased_en_5.1.2_3.0_1694771936312.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_bulgarian_cased_en_5.1.2_3.0_1694771936312.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_bulgarian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_bulgarian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_bulgarian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|260.5 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-bg-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_chinese_cased_en.md new file mode 100644 index 00000000000000..a2db36c483b540 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_chinese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_chinese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_chinese_cased_en_5.1.2_3.0_1694776185915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_chinese_cased_en_5.1.2_3.0_1694776185915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_chinese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|262.2 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_chinese_hindi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_chinese_hindi_cased_en.md new file mode 100644 index 00000000000000..415ac319d7a072 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_chinese_hindi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_chinese_hindi_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_chinese_hindi_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_chinese_hindi_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_chinese_hindi_cased_en_5.1.2_3.0_1694776303232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_chinese_hindi_cased_en_5.1.2_3.0_1694776303232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_chinese_hindi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_chinese_hindi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_chinese_hindi_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|266.8 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-zh-hi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_danish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_danish_cased_en.md new file mode 100644 index 00000000000000..2d9e1196e841eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_danish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_danish_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_danish_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_danish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_danish_cased_en_5.1.2_3.0_1694772113636.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_danish_cased_en_5.1.2_3.0_1694772113636.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_danish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_danish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_danish_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|255.0 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-da-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_dutch_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_dutch_cased_en.md new file mode 100644 index 00000000000000..61cb1497036289 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_dutch_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_dutch_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_dutch_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_dutch_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_dutch_cased_en_5.1.2_3.0_1694774832781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_dutch_cased_en_5.1.2_3.0_1694774832781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_dutch_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_dutch_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_dutch_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|256.4 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-nl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_arabic_cased_en.md new file mode 100644 index 00000000000000..8f09ebf3492900 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_arabic_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_arabic_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_arabic_cased_en_5.1.2_3.0_1694772831618.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_arabic_cased_en_5.1.2_3.0_1694772831618.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_arabic_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|266.4 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_cased_en.md new file mode 100644 index 00000000000000..f14dada027cdb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_cased_en_5.1.2_3.0_1694772943858.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_cased_en_5.1.2_3.0_1694772943858.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|257.0 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_chinese_cased_en.md new file mode 100644 index 00000000000000..36d70dc96125db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_chinese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_chinese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_chinese_cased_en_5.1.2_3.0_1694774157815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_chinese_cased_en_5.1.2_3.0_1694774157815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_chinese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|275.7 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_chinese_japanese_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_chinese_japanese_vietnamese_cased_en.md new file mode 100644 index 00000000000000..0677b778b8aeb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_chinese_japanese_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_chinese_japanese_vietnamese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_chinese_japanese_vietnamese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_chinese_japanese_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_chinese_japanese_vietnamese_cased_en_5.1.2_3.0_1694774268638.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_chinese_japanese_vietnamese_cased_en_5.1.2_3.0_1694774268638.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_chinese_japanese_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_chinese_japanese_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_chinese_japanese_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|287.0 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-zh-ja-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_danish_japanese_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_danish_japanese_vietnamese_cased_en.md new file mode 100644 index 00000000000000..26ea43a59387ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_danish_japanese_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_danish_japanese_vietnamese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_danish_japanese_vietnamese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_danish_japanese_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_danish_japanese_vietnamese_cased_en_5.1.2_3.0_1694773081995.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_danish_japanese_vietnamese_cased_en_5.1.2_3.0_1694773081995.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_danish_japanese_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_danish_japanese_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_danish_japanese_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|287.0 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-da-ja-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_dutch_russian_arabic_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_dutch_russian_arabic_cased_en.md new file mode 100644 index 00000000000000..723c68a70aafef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_dutch_russian_arabic_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_dutch_russian_arabic_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_dutch_russian_arabic_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_dutch_russian_arabic_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_dutch_russian_arabic_cased_en_5.1.2_3.0_1694774052258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_dutch_russian_arabic_cased_en_5.1.2_3.0_1694774052258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_dutch_russian_arabic_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_dutch_russian_arabic_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_dutch_russian_arabic_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|301.9 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-nl-ru-ar-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_german_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_german_cased_en.md new file mode 100644 index 00000000000000..9b3994bfcaa4ea --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_german_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_german_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_german_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_german_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_german_cased_en_5.1.2_3.0_1694773218512.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_german_cased_en_5.1.2_3.0_1694773218512.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_german_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_german_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_german_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|273.1 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-de-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_german_norwegian_danish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_german_norwegian_danish_cased_en.md new file mode 100644 index 00000000000000..6f0459344ad57f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_german_norwegian_danish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_german_norwegian_danish_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_german_norwegian_danish_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_german_norwegian_danish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_german_norwegian_danish_cased_en_5.1.2_3.0_1694773335862.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_german_norwegian_danish_cased_en_5.1.2_3.0_1694773335862.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_german_norwegian_danish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_german_norwegian_danish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_german_norwegian_danish_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|280.9 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-de-no-da-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_italian_cased_en.md new file mode 100644 index 00000000000000..97d9954b4c2ee2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_italian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_italian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_italian_cased_en_5.1.2_3.0_1694773810141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_italian_cased_en_5.1.2_3.0_1694773810141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_italian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|268.6 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_lithuanian_norwegian_polish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_lithuanian_norwegian_polish_cased_en.md new file mode 100644 index 00000000000000..bd6dc3c0e5832e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_lithuanian_norwegian_polish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_lithuanian_norwegian_polish_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_lithuanian_norwegian_polish_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_lithuanian_norwegian_polish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_lithuanian_norwegian_polish_cased_en_5.1.2_3.0_1694773937924.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_lithuanian_norwegian_polish_cased_en_5.1.2_3.0_1694773937924.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_lithuanian_norwegian_polish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_lithuanian_norwegian_polish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_lithuanian_norwegian_polish_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|282.3 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-lt-no-pl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_cased_en.md new file mode 100644 index 00000000000000..d3d77074005313 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_spanish_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_spanish_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_spanish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_spanish_cased_en_5.1.2_3.0_1694773457609.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_spanish_cased_en_5.1.2_3.0_1694773457609.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_spanish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_spanish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_spanish_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|273.5 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-es-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_german_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_german_chinese_cased_en.md new file mode 100644 index 00000000000000..4c8ac82883c4a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_german_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_french_spanish_german_chinese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_spanish_german_chinese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_spanish_german_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_spanish_german_chinese_cased_en_5.1.2_3.0_1694773578439.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_spanish_german_chinese_cased_en_5.1.2_3.0_1694773578439.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_spanish_german_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_spanish_german_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_spanish_german_chinese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|307.1 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-es-de-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_portuguese_italian_cased_xx.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_portuguese_italian_cased_xx.md new file mode 100644 index 00000000000000..fb3d04ae622e55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_french_spanish_portuguese_italian_cased_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual distilbert_base_english_french_spanish_portuguese_italian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_french_spanish_portuguese_italian_cased +date: 2023-09-15 +tags: [distilbert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_french_spanish_portuguese_italian_cased` is a Multilingual model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_spanish_portuguese_italian_cased_xx_5.1.2_3.0_1694773689639.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_french_spanish_portuguese_italian_cased_xx_5.1.2_3.0_1694773689639.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_french_spanish_portuguese_italian_cased","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_french_spanish_portuguese_italian_cased", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_french_spanish_portuguese_italian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|285.0 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_german_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_german_cased_en.md new file mode 100644 index 00000000000000..f41b333cf9a22a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_german_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_german_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_german_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_german_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_german_cased_en_5.1.2_3.0_1694772216088.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_german_cased_en_5.1.2_3.0_1694772216088.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_german_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_german_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_german_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|262.2 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-de-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_hindi_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_hindi_cased_en.md new file mode 100644 index 00000000000000..e7019a696e680a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_hindi_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_hindi_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_hindi_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_hindi_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_hindi_cased_en_5.1.2_3.0_1694774382188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_hindi_cased_en_5.1.2_3.0_1694774382188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_hindi_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_hindi_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_hindi_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-hi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_italian_cased_en.md new file mode 100644 index 00000000000000..586f8d76357949 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_italian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_italian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_italian_cased_en_5.1.2_3.0_1694774487693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_italian_cased_en_5.1.2_3.0_1694774487693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_italian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.3 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_japanese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_japanese_cased_en.md new file mode 100644 index 00000000000000..606d482826af62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_japanese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_japanese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_japanese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_japanese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_japanese_cased_en_5.1.2_3.0_1694774608716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_japanese_cased_en_5.1.2_3.0_1694774608716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_japanese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_japanese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_japanese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|256.7 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-ja-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_lithuanian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_lithuanian_cased_en.md new file mode 100644 index 00000000000000..e40c9a0ef0a05d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_lithuanian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_lithuanian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_lithuanian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_lithuanian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_lithuanian_cased_en_5.1.2_3.0_1694774717030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_lithuanian_cased_en_5.1.2_3.0_1694774717030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_lithuanian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_lithuanian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_lithuanian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|252.4 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-lt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_norwegian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_norwegian_cased_en.md new file mode 100644 index 00000000000000..fc156ef81deef1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_norwegian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_norwegian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_norwegian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_norwegian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_norwegian_cased_en_5.1.2_3.0_1694774950666.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_norwegian_cased_en_5.1.2_3.0_1694774950666.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_norwegian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_norwegian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_norwegian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|256.0 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-no-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_polish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_polish_cased_en.md new file mode 100644 index 00000000000000..1c6551a361cb40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_polish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_polish_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_polish_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_polish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_polish_cased_en_5.1.2_3.0_1694775064040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_polish_cased_en_5.1.2_3.0_1694775064040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_polish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_polish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_polish_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|258.3 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-pl-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_portuguese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_portuguese_cased_en.md new file mode 100644 index 00000000000000..66c1afe09eb274 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_portuguese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_portuguese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_portuguese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_portuguese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_portuguese_cased_en_5.1.2_3.0_1694775172152.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_portuguese_cased_en_5.1.2_3.0_1694775172152.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_portuguese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_portuguese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_portuguese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|259.6 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-pt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_romanian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_romanian_cased_en.md new file mode 100644 index 00000000000000..16175fa13ef393 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_romanian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_romanian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_romanian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_romanian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_romanian_cased_en_5.1.2_3.0_1694775272352.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_romanian_cased_en_5.1.2_3.0_1694775272352.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_romanian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_romanian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_romanian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|253.7 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-ro-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_russian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_russian_cased_en.md new file mode 100644 index 00000000000000..7e6a1f9547a0c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_russian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_russian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_russian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_russian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_russian_cased_en_5.1.2_3.0_1694775407575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_russian_cased_en_5.1.2_3.0_1694775407575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_russian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_russian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_russian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|268.7 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-ru-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_cased_en.md new file mode 100644 index 00000000000000..ac4013218ebf90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_spanish_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_spanish_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_spanish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_spanish_cased_en_5.1.2_3.0_1694772349323.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_spanish_cased_en_5.1.2_3.0_1694772349323.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_spanish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_spanish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_spanish_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|262.6 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-es-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_chinese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_chinese_cased_en.md new file mode 100644 index 00000000000000..de8fa75ccc422c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_chinese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_spanish_chinese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_spanish_chinese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_spanish_chinese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_spanish_chinese_cased_en_5.1.2_3.0_1694772709372.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_spanish_chinese_cased_en_5.1.2_3.0_1694772709372.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_spanish_chinese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_spanish_chinese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_spanish_chinese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|281.7 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-es-zh-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_italian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_italian_cased_en.md new file mode 100644 index 00000000000000..132dccba1d6e08 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_italian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_spanish_italian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_spanish_italian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_spanish_italian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_spanish_italian_cased_en_5.1.2_3.0_1694772463123.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_spanish_italian_cased_en_5.1.2_3.0_1694772463123.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_spanish_italian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_spanish_italian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_spanish_italian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|272.0 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-es-it-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_portuguese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_portuguese_cased_en.md new file mode 100644 index 00000000000000..e97b5e4fd7e93a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_spanish_portuguese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_spanish_portuguese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_spanish_portuguese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_spanish_portuguese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_spanish_portuguese_cased_en_5.1.2_3.0_1694772586206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_spanish_portuguese_cased_en_5.1.2_3.0_1694772586206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_spanish_portuguese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_spanish_portuguese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_spanish_portuguese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|268.4 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-es-pt-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_swahili_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_swahili_cased_en.md new file mode 100644 index 00000000000000..126f16b2350ba3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_swahili_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_swahili_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_swahili_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_swahili_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_swahili_cased_en_5.1.2_3.0_1694775528652.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_swahili_cased_en_5.1.2_3.0_1694775528652.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_swahili_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_swahili_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_swahili_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-sw-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_thai_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_thai_cased_en.md new file mode 100644 index 00000000000000..67daf96a0fd159 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_thai_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_thai_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_thai_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_thai_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_thai_cased_en_5.1.2_3.0_1694775652118.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_thai_cased_en_5.1.2_3.0_1694775652118.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_thai_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_thai_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_thai_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.7 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-th-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_turkish_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_turkish_cased_en.md new file mode 100644 index 00000000000000..ed2d8baf7aa504 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_turkish_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_turkish_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_turkish_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_turkish_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_turkish_cased_en_5.1.2_3.0_1694775755844.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_turkish_cased_en_5.1.2_3.0_1694775755844.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_turkish_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_turkish_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_turkish_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|251.1 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-tr-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_ukrainian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_ukrainian_cased_en.md new file mode 100644 index 00000000000000..86887587730333 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_ukrainian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_ukrainian_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_ukrainian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_ukrainian_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_ukrainian_cased_en_5.1.2_3.0_1694775863815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_ukrainian_cased_en_5.1.2_3.0_1694775863815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_ukrainian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_ukrainian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_ukrainian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|262.9 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-uk-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_urdu_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_urdu_cased_en.md new file mode 100644 index 00000000000000..f552acc2b40c20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_urdu_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_urdu_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_urdu_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_urdu_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_urdu_cased_en_5.1.2_3.0_1694775970383.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_urdu_cased_en_5.1.2_3.0_1694775970383.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_urdu_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_urdu_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_urdu_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|251.1 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-ur-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_vietnamese_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_vietnamese_cased_en.md new file mode 100644 index 00000000000000..db1dac48cd374a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_english_vietnamese_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_english_vietnamese_cased DistilBertEmbeddings from Geotrend +author: John Snow Labs +name: distilbert_base_english_vietnamese_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_english_vietnamese_cased` is a English model originally trained by Geotrend. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_english_vietnamese_cased_en_5.1.2_3.0_1694776082127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_english_vietnamese_cased_en_5.1.2_3.0_1694776082127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_english_vietnamese_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_english_vietnamese_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_english_vietnamese_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|251.3 MB| + +## References + +https://huggingface.co/Geotrend/distilbert-base-en-vi-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_food_review_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_food_review_en.md new file mode 100644 index 00000000000000..e19e3205926823 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_food_review_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_food_review DistilBertEmbeddings from breakjl +author: John Snow Labs +name: distilbert_base_food_review +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_food_review` is a English model originally trained by breakjl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_food_review_en_5.1.2_3.0_1694781544212.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_food_review_en_5.1.2_3.0_1694781544212.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_food_review","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_food_review", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_food_review| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/breakjl/distilbert-base-food_review \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_german_cased_de.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_german_cased_de.md new file mode 100644 index 00000000000000..09b6d2a1fe6097 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_german_cased_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German distilbert_base_german_cased DistilBertEmbeddings from huggingface +author: John Snow Labs +name: distilbert_base_german_cased +date: 2023-09-15 +tags: [distilbert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_german_cased` is a German model originally trained by huggingface. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_de_5.1.2_3.0_1694769969382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_de_5.1.2_3.0_1694769969382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_german_cased","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_german_cased", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_german_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|250.3 MB| + +## References + +https://huggingface.co/distilbert-base-german-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_german_cased_finetuned_amazon_reviews_de.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_german_cased_finetuned_amazon_reviews_de.md new file mode 100644 index 00000000000000..ad8b9c47eb6bcd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_german_cased_finetuned_amazon_reviews_de.md @@ -0,0 +1,93 @@ +--- +layout: model +title: German distilbert_base_german_cased_finetuned_amazon_reviews DistilBertEmbeddings from mariav +author: John Snow Labs +name: distilbert_base_german_cased_finetuned_amazon_reviews +date: 2023-09-15 +tags: [distilbert, de, open_source, fill_mask, onnx] +task: Embeddings +language: de +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_german_cased_finetuned_amazon_reviews` is a German model originally trained by mariav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_finetuned_amazon_reviews_de_5.1.2_3.0_1694774283101.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_german_cased_finetuned_amazon_reviews_de_5.1.2_3.0_1694774283101.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_german_cased_finetuned_amazon_reviews","de") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_german_cased_finetuned_amazon_reviews", "de") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_german_cased_finetuned_amazon_reviews| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|de| +|Size:|250.3 MB| + +## References + +https://huggingface.co/mariav/distilbert-base-german-cased-finetuned-amazon-reviews \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_indonesian_id.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_indonesian_id.md new file mode 100644 index 00000000000000..6ffbb1a7dfb19c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_indonesian_id.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Indonesian distilbert_base_indonesian DistilBertEmbeddings from cahya +author: John Snow Labs +name: distilbert_base_indonesian +date: 2023-09-15 +tags: [distilbert, id, open_source, fill_mask, onnx] +task: Embeddings +language: id +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_indonesian` is a Indonesian model originally trained by cahya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_indonesian_id_5.1.2_3.0_1694780162345.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_indonesian_id_5.1.2_3.0_1694780162345.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_indonesian","id") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_indonesian", "id") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_indonesian| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|id| +|Size:|253.0 MB| + +## References + +https://huggingface.co/cahya/distilbert-base-indonesian \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_multilingual_cased_bulgarian_wikipedia_xx.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_multilingual_cased_bulgarian_wikipedia_xx.md new file mode 100644 index 00000000000000..013ad0c084893a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_multilingual_cased_bulgarian_wikipedia_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_bulgarian_wikipedia DistilBertEmbeddings from mor40 +author: John Snow Labs +name: distilbert_base_multilingual_cased_bulgarian_wikipedia +date: 2023-09-15 +tags: [distilbert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_bulgarian_wikipedia` is a Multilingual model originally trained by mor40. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_bulgarian_wikipedia_xx_5.1.2_3.0_1694784992621.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_bulgarian_wikipedia_xx_5.1.2_3.0_1694784992621.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased_bulgarian_wikipedia","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_multilingual_cased_bulgarian_wikipedia", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_bulgarian_wikipedia| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|505.3 MB| + +## References + +https://huggingface.co/mor40/distilbert-base-multilingual-cased-bg-wikipedia \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_multilingual_cased_finetuned_kintweetse_xx.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_multilingual_cased_finetuned_kintweetse_xx.md new file mode 100644 index 00000000000000..9205b7afab9408 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_multilingual_cased_finetuned_kintweetse_xx.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Multilingual distilbert_base_multilingual_cased_finetuned_kintweetse DistilBertEmbeddings from RogerB +author: John Snow Labs +name: distilbert_base_multilingual_cased_finetuned_kintweetse +date: 2023-09-15 +tags: [distilbert, xx, open_source, fill_mask, onnx] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_multilingual_cased_finetuned_kintweetse` is a Multilingual model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_kintweetse_xx_5.1.2_3.0_1694773789090.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_multilingual_cased_finetuned_kintweetse_xx_5.1.2_3.0_1694773789090.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_multilingual_cased_finetuned_kintweetse","xx") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_multilingual_cased_finetuned_kintweetse", "xx") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_multilingual_cased_finetuned_kintweetse| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|505.4 MB| + +## References + +https://huggingface.co/RogerB/distilbert-base-multilingual-cased-finetuned-kintweetsE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_es.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_es.md new file mode 100644 index 00000000000000..c91d1b9b7089d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_es.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Castilian, Spanish distilbert_base_spanish_uncased DistilBertEmbeddings from dccuchile +author: John Snow Labs +name: distilbert_base_spanish_uncased +date: 2023-09-15 +tags: [distilbert, es, open_source, fill_mask, onnx] +task: Embeddings +language: es +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_spanish_uncased` is a Castilian, Spanish model originally trained by dccuchile. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_es_5.1.2_3.0_1694770464754.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_es_5.1.2_3.0_1694770464754.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_spanish_uncased","es") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_spanish_uncased", "es") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_spanish_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|es| +|Size:|250.2 MB| + +## References + +https://huggingface.co/dccuchile/distilbert-base-spanish-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_finetuned_amazon_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_finetuned_amazon_en.md new file mode 100644 index 00000000000000..1a929d4ef1b1b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_finetuned_amazon_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_spanish_uncased_finetuned_amazon DistilBertEmbeddings from Marianoleiras +author: John Snow Labs +name: distilbert_base_spanish_uncased_finetuned_amazon +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_spanish_uncased_finetuned_amazon` is a English model originally trained by Marianoleiras. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_amazon_en_5.1.2_3.0_1694777013368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_amazon_en_5.1.2_3.0_1694777013368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_spanish_uncased_finetuned_amazon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_spanish_uncased_finetuned_amazon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_spanish_uncased_finetuned_amazon| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/Marianoleiras/distilbert-base-spanish-uncased-finetuned-amazon \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_finetuned_clinais_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_finetuned_clinais_en.md new file mode 100644 index 00000000000000..2180d32468bab9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_finetuned_clinais_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_spanish_uncased_finetuned_clinais DistilBertEmbeddings from joheras +author: John Snow Labs +name: distilbert_base_spanish_uncased_finetuned_clinais +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_spanish_uncased_finetuned_clinais` is a English model originally trained by joheras. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_clinais_en_5.1.2_3.0_1694774679930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_finetuned_clinais_en_5.1.2_3.0_1694774679930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_spanish_uncased_finetuned_clinais","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_spanish_uncased_finetuned_clinais", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_spanish_uncased_finetuned_clinais| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/joheras/distilbert-base-spanish-uncased-finetuned-clinais \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_model_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_model_en.md new file mode 100644 index 00000000000000..7154505408b50d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_spanish_uncased_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_spanish_uncased_model DistilBertEmbeddings from AleNunezArroyo +author: John Snow Labs +name: distilbert_base_spanish_uncased_model +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_spanish_uncased_model` is a English model originally trained by AleNunezArroyo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_model_en_5.1.2_3.0_1694786476451.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_spanish_uncased_model_en_5.1.2_3.0_1694786476451.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_spanish_uncased_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_spanish_uncased_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_spanish_uncased_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/AleNunezArroyo/distilbert-base-spanish-uncased-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_turkish_cased_offensive_mlm_tr.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_turkish_cased_offensive_mlm_tr.md new file mode 100644 index 00000000000000..73eaf8472f9710 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_turkish_cased_offensive_mlm_tr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Turkish distilbert_base_turkish_cased_offensive_mlm DistilBertEmbeddings from Overfit-GM +author: John Snow Labs +name: distilbert_base_turkish_cased_offensive_mlm +date: 2023-09-15 +tags: [distilbert, tr, open_source, fill_mask, onnx] +task: Embeddings +language: tr +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_turkish_cased_offensive_mlm` is a Turkish model originally trained by Overfit-GM. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_turkish_cased_offensive_mlm_tr_5.1.2_3.0_1694777986744.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_turkish_cased_offensive_mlm_tr_5.1.2_3.0_1694777986744.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_turkish_cased_offensive_mlm","tr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_turkish_cased_offensive_mlm", "tr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_turkish_cased_offensive_mlm| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|tr| +|Size:|251.8 MB| + +## References + +https://huggingface.co/Overfit-GM/distilbert-base-turkish-cased-offensive-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_aisera_texts_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_aisera_texts_en.md new file mode 100644 index 00000000000000..98e8bbc60c2f76 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_aisera_texts_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_aisera_texts DistilBertEmbeddings from Theimisa +author: John Snow Labs +name: distilbert_base_uncased_aisera_texts +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_aisera_texts` is a English model originally trained by Theimisa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_aisera_texts_en_5.1.2_3.0_1694771537071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_aisera_texts_en_5.1.2_3.0_1694771537071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_aisera_texts","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_aisera_texts", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_aisera_texts| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Theimisa/distilbert-base-uncased-aisera_texts \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_aisera_texts_v3_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_aisera_texts_v3_en.md new file mode 100644 index 00000000000000..41c888ba9aa83a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_aisera_texts_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_aisera_texts_v3 DistilBertEmbeddings from Theimisa +author: John Snow Labs +name: distilbert_base_uncased_aisera_texts_v3 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_aisera_texts_v3` is a English model originally trained by Theimisa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_aisera_texts_v3_en_5.1.2_3.0_1694771955323.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_aisera_texts_v3_en_5.1.2_3.0_1694771955323.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_aisera_texts_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_aisera_texts_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_aisera_texts_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Theimisa/distilbert-base-uncased-aisera_texts-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_arxiv_abstracts_10k_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_arxiv_abstracts_10k_en.md new file mode 100644 index 00000000000000..244d68acd24e74 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_arxiv_abstracts_10k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_arxiv_abstracts_10k DistilBertEmbeddings from timetoai +author: John Snow Labs +name: distilbert_base_uncased_arxiv_abstracts_10k +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_arxiv_abstracts_10k` is a English model originally trained by timetoai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_arxiv_abstracts_10k_en_5.1.2_3.0_1694786407387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_arxiv_abstracts_10k_en_5.1.2_3.0_1694786407387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_arxiv_abstracts_10k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_arxiv_abstracts_10k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_arxiv_abstracts_10k| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/timetoai/distilbert-base-uncased-arxiv-abstracts-10k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_bert_yoga_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_bert_yoga_finetuned_en.md new file mode 100644 index 00000000000000..dd84bcaf8d1de4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_bert_yoga_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_bert_yoga_finetuned DistilBertEmbeddings from dsantistevan +author: John Snow Labs +name: distilbert_base_uncased_bert_yoga_finetuned +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_bert_yoga_finetuned` is a English model originally trained by dsantistevan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_bert_yoga_finetuned_en_5.1.2_3.0_1694782584370.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_bert_yoga_finetuned_en_5.1.2_3.0_1694782584370.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_bert_yoga_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_bert_yoga_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_bert_yoga_finetuned| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dsantistevan/distilbert-base-uncased-bert-yoga-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_cohl_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_cohl_en.md new file mode 100644 index 00000000000000..20254050842eb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_cohl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_cohl DistilBertEmbeddings from shafin +author: John Snow Labs +name: distilbert_base_uncased_cohl +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_cohl` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_cohl_en_5.1.2_3.0_1694789103082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_cohl_en_5.1.2_3.0_1694789103082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_cohl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_cohl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_cohl| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shafin/distilbert-base-uncased-cohl \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned2_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned2_imdb_en.md new file mode 100644 index 00000000000000..192a81712410a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned2_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned2_imdb DistilBertEmbeddings from Ghost1 +author: John Snow Labs +name: distilbert_base_uncased_finetuned2_imdb +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned2_imdb` is a English model originally trained by Ghost1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned2_imdb_en_5.1.2_3.0_1694770185421.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned2_imdb_en_5.1.2_3.0_1694770185421.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned2_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned2_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned2_imdb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ghost1/distilbert-base-uncased-finetuned2-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_allocation_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_allocation_en.md new file mode 100644 index 00000000000000..408432551a1b7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_allocation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_allocation DistilBertEmbeddings from shubham7 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_allocation +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_allocation` is a English model originally trained by shubham7. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_allocation_en_5.1.2_3.0_1694789203683.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_allocation_en_5.1.2_3.0_1694789203683.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_allocation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_allocation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_allocation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/shubham7/distilbert-base-uncased-finetuned-allocation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_amazon_review_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_amazon_review_en.md new file mode 100644 index 00000000000000..61cd2d11f30b99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_amazon_review_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_amazon_review DistilBertEmbeddings from soyisauce +author: John Snow Labs +name: distilbert_base_uncased_finetuned_amazon_review +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_amazon_review` is a English model originally trained by soyisauce. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_amazon_review_en_5.1.2_3.0_1694790068018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_amazon_review_en_5.1.2_3.0_1694790068018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_amazon_review","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_amazon_review", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_amazon_review| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/soyisauce/distilbert-base-uncased-finetuned-amazon_review \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ccnews_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ccnews_en.md new file mode 100644 index 00000000000000..c592f2cc267708 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ccnews_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ccnews DistilBertEmbeddings from mchalek +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ccnews +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ccnews` is a English model originally trained by mchalek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ccnews_en_5.1.2_3.0_1694790900189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ccnews_en_5.1.2_3.0_1694790900189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_ccnews","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_ccnews", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ccnews| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mchalek/distilbert-base-uncased-finetuned-ccnews \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_en.md new file mode 100644 index 00000000000000..6a364cc025f64c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_char DistilBertEmbeddings from bhagasra-saurav +author: John Snow Labs +name: distilbert_base_uncased_finetuned_char +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_char` is a English model originally trained by bhagasra-saurav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_char_en_5.1.2_3.0_1694784715338.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_char_en_5.1.2_3.0_1694784715338.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_char","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_char", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_char| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/bhagasra-saurav/distilbert-base-uncased-finetuned-char \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v1_en.md new file mode 100644 index 00000000000000..c2bd6fd8bcad8e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_char_v1 DistilBertEmbeddings from bhagasra-saurav +author: John Snow Labs +name: distilbert_base_uncased_finetuned_char_v1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_char_v1` is a English model originally trained by bhagasra-saurav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_char_v1_en_5.1.2_3.0_1694785235023.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_char_v1_en_5.1.2_3.0_1694785235023.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_char_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_char_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_char_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bhagasra-saurav/distilbert-base-uncased-finetuned-char-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v2_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v2_en.md new file mode 100644 index 00000000000000..66095204915f14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_char_v2 DistilBertEmbeddings from bhagasra-saurav +author: John Snow Labs +name: distilbert_base_uncased_finetuned_char_v2 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_char_v2` is a English model originally trained by bhagasra-saurav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_char_v2_en_5.1.2_3.0_1694785386613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_char_v2_en_5.1.2_3.0_1694785386613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_char_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_char_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_char_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/bhagasra-saurav/distilbert-base-uncased-finetuned-char-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v3_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v3_en.md new file mode 100644 index 00000000000000..9224f204e3d4a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_char_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_char_v3 DistilBertEmbeddings from bhagasra-saurav +author: John Snow Labs +name: distilbert_base_uncased_finetuned_char_v3 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_char_v3` is a English model originally trained by bhagasra-saurav. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_char_v3_en_5.1.2_3.0_1694785507806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_char_v3_en_5.1.2_3.0_1694785507806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_char_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_char_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_char_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/bhagasra-saurav/distilbert-base-uncased-finetuned-char-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_civi_cooments_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_civi_cooments_accelerate_en.md new file mode 100644 index 00000000000000..6849c3f4264606 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_civi_cooments_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_civi_cooments_accelerate DistilBertEmbeddings from SmartPy +author: John Snow Labs +name: distilbert_base_uncased_finetuned_civi_cooments_accelerate +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_civi_cooments_accelerate` is a English model originally trained by SmartPy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_civi_cooments_accelerate_en_5.1.2_3.0_1694772882674.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_civi_cooments_accelerate_en_5.1.2_3.0_1694772882674.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_civi_cooments_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_civi_cooments_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_civi_cooments_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SmartPy/distilbert-base-uncased-finetuned-civi-cooments-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_clinc150_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_clinc150_en.md new file mode 100644 index 00000000000000..62008c1952d486 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_clinc150_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_clinc150 DistilBertEmbeddings from FilippoComastri +author: John Snow Labs +name: distilbert_base_uncased_finetuned_clinc150 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_clinc150` is a English model originally trained by FilippoComastri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc150_en_5.1.2_3.0_1694780894423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_clinc150_en_5.1.2_3.0_1694780894423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_clinc150","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_clinc150", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_clinc150| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/FilippoComastri/distilbert-base-uncased-finetuned-clinc150 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cnn_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cnn_en.md new file mode 100644 index 00000000000000..1dd8748f045f58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cnn_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_cnn DistilBertEmbeddings from SmartPy +author: John Snow Labs +name: distilbert_base_uncased_finetuned_cnn +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_cnn` is a English model originally trained by SmartPy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cnn_en_5.1.2_3.0_1694783567189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cnn_en_5.1.2_3.0_1694783567189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_cnn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_cnn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_cnn| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SmartPy/distilbert-base-uncased-finetuned-cnn \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_crypto_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_crypto_en.md new file mode 100644 index 00000000000000..73aaf1d6d8970c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_crypto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_crypto DistilBertEmbeddings from smarquie +author: John Snow Labs +name: distilbert_base_uncased_finetuned_crypto +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_crypto` is a English model originally trained by smarquie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_crypto_en_5.1.2_3.0_1694780201008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_crypto_en_5.1.2_3.0_1694780201008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_crypto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_crypto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_crypto| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/smarquie/distilbert-base-uncased-finetuned-crypto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ct_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ct_en.md new file mode 100644 index 00000000000000..00d315670b1fed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ct_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ct DistilBertEmbeddings from anthonyyazdani +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ct +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ct` is a English model originally trained by anthonyyazdani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ct_en_5.1.2_3.0_1694771385621.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ct_en_5.1.2_3.0_1694771385621.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_ct","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_ct", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ct| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/anthonyyazdani/distilbert-base-uncased-finetuned-CT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cust_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cust_en.md new file mode 100644 index 00000000000000..5ded3ab0043fd3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cust_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_cust DistilBertEmbeddings from shafin +author: John Snow Labs +name: distilbert_base_uncased_finetuned_cust +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_cust` is a English model originally trained by shafin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cust_en_5.1.2_3.0_1694773996911.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cust_en_5.1.2_3.0_1694773996911.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_cust","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_cust", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_cust| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shafin/distilbert-base-uncased-finetuned-cust \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cvent_2019_2022_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cvent_2019_2022_en.md new file mode 100644 index 00000000000000..b85ccee1867399 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cvent_2019_2022_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_cvent_2019_2022 DistilBertEmbeddings from vives +author: John Snow Labs +name: distilbert_base_uncased_finetuned_cvent_2019_2022 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_cvent_2019_2022` is a English model originally trained by vives. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cvent_2019_2022_en_5.1.2_3.0_1694773096960.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cvent_2019_2022_en_5.1.2_3.0_1694773096960.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_cvent_2019_2022","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_cvent_2019_2022", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_cvent_2019_2022| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vives/distilbert-base-uncased-finetuned-cvent-2019_2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cvent_2022_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cvent_2022_en.md new file mode 100644 index 00000000000000..aa6933e92831f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_cvent_2022_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_cvent_2022 DistilBertEmbeddings from vives +author: John Snow Labs +name: distilbert_base_uncased_finetuned_cvent_2022 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_cvent_2022` is a English model originally trained by vives. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cvent_2022_en_5.1.2_3.0_1694772739443.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_cvent_2022_en_5.1.2_3.0_1694772739443.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_cvent_2022","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_cvent_2022", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_cvent_2022| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vives/distilbert-base-uncased-finetuned-cvent-2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_dis_mlm5_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_dis_mlm5_en.md new file mode 100644 index 00000000000000..76160242691f13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_dis_mlm5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_dis_mlm5 DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_dis_mlm5 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_dis_mlm5` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dis_mlm5_en_5.1.2_3.0_1694781655821.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_dis_mlm5_en_5.1.2_3.0_1694781655821.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_dis_mlm5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_dis_mlm5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_dis_mlm5| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-DIS-mlm5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_discord_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_discord_en.md new file mode 100644 index 00000000000000..8f57fc772ed09d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_discord_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_discord DistilBertEmbeddings from Phroggu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_discord +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_discord` is a English model originally trained by Phroggu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_discord_en_5.1.2_3.0_1694782495752.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_discord_en_5.1.2_3.0_1694782495752.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_discord","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_discord", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_discord| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Phroggu/distilbert-base-uncased-finetuned-discord \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_domain_adaptation_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_domain_adaptation_en.md new file mode 100644 index 00000000000000..b9678c9f78f3fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_domain_adaptation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_domain_adaptation DistilBertEmbeddings from algiraldohe +author: John Snow Labs +name: distilbert_base_uncased_finetuned_domain_adaptation +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_domain_adaptation` is a English model originally trained by algiraldohe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_domain_adaptation_en_5.1.2_3.0_1694770565017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_domain_adaptation_en_5.1.2_3.0_1694770565017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_domain_adaptation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_domain_adaptation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_domain_adaptation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/algiraldohe/distilbert-base-uncased-finetuned-domain-adaptation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_fetch_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_fetch_en.md new file mode 100644 index 00000000000000..156d63873a6b60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_fetch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_fetch DistilBertEmbeddings from rtreptow +author: John Snow Labs +name: distilbert_base_uncased_finetuned_fetch +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_fetch` is a English model originally trained by rtreptow. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_fetch_en_5.1.2_3.0_1694792165514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_fetch_en_5.1.2_3.0_1694792165514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_fetch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_fetch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_fetch| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rtreptow/distilbert-base-uncased-finetuned-fetch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_game_accelerate_v2_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_game_accelerate_v2_en.md new file mode 100644 index 00000000000000..e7d87711bde367 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_game_accelerate_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_game_accelerate_v2 DistilBertEmbeddings from kaiku03 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_game_accelerate_v2 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_game_accelerate_v2` is a English model originally trained by kaiku03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_game_accelerate_v2_en_5.1.2_3.0_1694789931049.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_game_accelerate_v2_en_5.1.2_3.0_1694789931049.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_game_accelerate_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_game_accelerate_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_game_accelerate_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kaiku03/distilbert-base-uncased-finetuned-game-accelerate_V2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_en.md new file mode 100644 index 00000000000000..dce335a6e8a9d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_himani_auto DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_himani_auto +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_himani_auto` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_en_5.1.2_3.0_1694780256193.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_en_5.1.2_3.0_1694780256193.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_himani_auto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_himani_auto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_himani_auto| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-himani_auto \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_gen_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_gen_en.md new file mode 100644 index 00000000000000..80b471a8a49cce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_gen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_himani_auto_gen DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_himani_auto_gen +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_himani_auto_gen` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_gen_en_5.1.2_3.0_1694780783124.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_gen_en_5.1.2_3.0_1694780783124.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_himani_auto_gen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_himani_auto_gen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_himani_auto_gen| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-himani_auto-gen \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_text_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_text_en.md new file mode 100644 index 00000000000000..c24c8aa3930423 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_text_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_himani_auto_text DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_himani_auto_text +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_himani_auto_text` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_text_en_5.1.2_3.0_1694780380500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_text_en_5.1.2_3.0_1694780380500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_himani_auto_text","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_himani_auto_text", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_himani_auto_text| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-himani_auto-TEXT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_text_gen_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_text_gen_en.md new file mode 100644 index 00000000000000..764413b2f1274f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_text_gen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_himani_auto_text_gen DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_himani_auto_text_gen +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_himani_auto_text_gen` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_text_gen_en_5.1.2_3.0_1694780620171.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_text_gen_en_5.1.2_3.0_1694780620171.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_himani_auto_text_gen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_himani_auto_text_gen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_himani_auto_text_gen| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-himani_auto-text-gen \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_textgeneration_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_textgeneration_en.md new file mode 100644 index 00000000000000..a2b0235a2c909f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_auto_textgeneration_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_himani_auto_textgeneration DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_himani_auto_textgeneration +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_himani_auto_textgeneration` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_textgeneration_en_5.1.2_3.0_1694780500428.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_auto_textgeneration_en_5.1.2_3.0_1694780500428.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_himani_auto_textgeneration","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_himani_auto_textgeneration", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_himani_auto_textgeneration| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-himani_auto-TEXTgeneration \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_gen_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_gen_en.md new file mode 100644 index 00000000000000..a4254b89309dad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_gen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_himani_gen DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_himani_gen +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_himani_gen` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_gen_en_5.1.2_3.0_1694780899595.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_gen_en_5.1.2_3.0_1694780899595.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_himani_gen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_himani_gen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_himani_gen| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-himani-gen \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_m_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_m_en.md new file mode 100644 index 00000000000000..8d3e1610cd4e11 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_himani_m_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_himani_m DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_himani_m +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_himani_m` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_m_en_5.1.2_3.0_1694781279459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_himani_m_en_5.1.2_3.0_1694781279459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_himani_m","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_himani_m", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_himani_m| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-himani-m \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_hina_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_hina_en.md new file mode 100644 index 00000000000000..3d3ce4fea0c69f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_hina_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_hina DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_hina +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_hina` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_hina_en_5.1.2_3.0_1694780144155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_hina_en_5.1.2_3.0_1694780144155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_hina","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_hina", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_hina| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-hina \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_homedepot_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_homedepot_en.md new file mode 100644 index 00000000000000..ee78b576460382 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_homedepot_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_homedepot DistilBertEmbeddings from Ukhushn +author: John Snow Labs +name: distilbert_base_uncased_finetuned_homedepot +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_homedepot` is a English model originally trained by Ukhushn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_homedepot_en_5.1.2_3.0_1694773345195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_homedepot_en_5.1.2_3.0_1694773345195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_homedepot","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_homedepot", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_homedepot| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ukhushn/distilbert-base-uncased-finetuned-homedepot \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_im_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_im_en.md new file mode 100644 index 00000000000000..60c44348217532 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_im_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_im DistilBertEmbeddings from shahriarebrampour +author: John Snow Labs +name: distilbert_base_uncased_finetuned_im +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_im` is a English model originally trained by shahriarebrampour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_im_en_5.1.2_3.0_1694788569896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_im_en_5.1.2_3.0_1694788569896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_im","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_im", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_im| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shahriarebrampour/distilbert-base-uncased-finetuned-im \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb1_en.md new file mode 100644 index 00000000000000..5126fed5979abf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb1 DistilBertEmbeddings from pulkitkumar13 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb1` is a English model originally trained by pulkitkumar13. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb1_en_5.1.2_3.0_1694787665201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb1_en_5.1.2_3.0_1694787665201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/pulkitkumar13/distilbert-base-uncased-finetuned-imdb1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_anikaai_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_anikaai_en.md new file mode 100644 index 00000000000000..85514f63f942d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_anikaai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_anikaai DistilBertEmbeddings from AnikaAI +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_anikaai +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_anikaai` is a English model originally trained by AnikaAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_anikaai_en_5.1.2_3.0_1694789143868.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_anikaai_en_5.1.2_3.0_1694789143868.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_anikaai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_anikaai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_anikaai| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/AnikaAI/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang_en.md new file mode 100644 index 00000000000000..bde3235d5932a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang DistilBertEmbeddings from Arthuerwang +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang` is a English model originally trained by Arthuerwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang_en_5.1.2_3.0_1694783071999.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang_en_5.1.2_3.0_1694783071999.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_arthuerwang| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Arthuerwang/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy_en.md new file mode 100644 index 00000000000000..45255bc0e2c683 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy DistilBertEmbeddings from averageandyyy +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy` is a English model originally trained by averageandyyy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy_en_5.1.2_3.0_1694776377229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy_en_5.1.2_3.0_1694776377229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_averageandyyy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/averageandyyy/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_binaryy_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_binaryy_en.md new file mode 100644 index 00000000000000..c4baa5b2af5634 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_binaryy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_binaryy DistilBertEmbeddings from Binaryy +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_binaryy +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_binaryy` is a English model originally trained by Binaryy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_binaryy_en_5.1.2_3.0_1694783198520.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_binaryy_en_5.1.2_3.0_1694783198520.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_binaryy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_binaryy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_binaryy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Binaryy/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze_en.md new file mode 100644 index 00000000000000..9c83bd18214daf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze DistilBertEmbeddings from caroline-betbeze +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze` is a English model originally trained by caroline-betbeze. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze_en_5.1.2_3.0_1694782749314.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze_en_5.1.2_3.0_1694782749314.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_caroline_betbeze| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/caroline-betbeze/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_cchychen_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_cchychen_en.md new file mode 100644 index 00000000000000..382a9dbe2a65dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_cchychen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_cchychen DistilBertEmbeddings from Cchychen +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_cchychen +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_cchychen` is a English model originally trained by Cchychen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_cchychen_en_5.1.2_3.0_1694786049897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_cchychen_en_5.1.2_3.0_1694786049897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_cchychen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_cchychen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_cchychen| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Cchychen/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh_en.md new file mode 100644 index 00000000000000..7c345712b2ccc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh DistilBertEmbeddings from chenxingphh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh` is a English model originally trained by chenxingphh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh_en_5.1.2_3.0_1694771845243.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh_en_5.1.2_3.0_1694771845243.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_chenxingphh| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/chenxingphh/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db_en.md new file mode 100644 index 00000000000000..334f5cd53e2113 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db DistilBertEmbeddings from coreyabs-db +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db` is a English model originally trained by coreyabs-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db_en_5.1.2_3.0_1694781387285.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db_en_5.1.2_3.0_1694781387285.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_coreyabs_db| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/coreyabs-db/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams_en.md new file mode 100644 index 00000000000000..be0d71be38ae57 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams DistilBertEmbeddings from DelusionalDreams +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams` is a English model originally trained by DelusionalDreams. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams_en_5.1.2_3.0_1694772163662.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams_en_5.1.2_3.0_1694772163662.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_delusionaldreams| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/DelusionalDreams/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_dewa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_dewa_en.md new file mode 100644 index 00000000000000..c8e5d61dfef8ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_dewa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_dewa DistilBertEmbeddings from Dewa +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_dewa +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_dewa` is a English model originally trained by Dewa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_dewa_en_5.1.2_3.0_1694777452620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_dewa_en_5.1.2_3.0_1694777452620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_dewa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_dewa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_dewa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Dewa/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_dmlea_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_dmlea_en.md new file mode 100644 index 00000000000000..5b4fa23b7f93a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_dmlea_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_dmlea DistilBertEmbeddings from dmlea +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_dmlea +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_dmlea` is a English model originally trained by dmlea. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_dmlea_en_5.1.2_3.0_1694790142387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_dmlea_en_5.1.2_3.0_1694790142387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_dmlea","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_dmlea", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_dmlea| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dmlea/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk_en.md new file mode 100644 index 00000000000000..3d028683b975b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk DistilBertEmbeddings from elhamagk +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk` is a English model originally trained by elhamagk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk_en_5.1.2_3.0_1694778234781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk_en_5.1.2_3.0_1694778234781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_elhamagk| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/elhamagk/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_emergix_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_emergix_en.md new file mode 100644 index 00000000000000..c314c1d6a4000e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_emergix_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_emergix DistilBertEmbeddings from emergix +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_emergix +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_emergix` is a English model originally trained by emergix. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_emergix_en_5.1.2_3.0_1694776509650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_emergix_en_5.1.2_3.0_1694776509650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_emergix","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_emergix", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_emergix| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/emergix/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_evincent18_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_evincent18_en.md new file mode 100644 index 00000000000000..6bf4a3c0e91d36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_evincent18_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_evincent18 DistilBertEmbeddings from evincent18 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_evincent18 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_evincent18` is a English model originally trained by evincent18. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_evincent18_en_5.1.2_3.0_1694771587269.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_evincent18_en_5.1.2_3.0_1694771587269.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_evincent18","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_evincent18", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_evincent18| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/evincent18/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi_en.md new file mode 100644 index 00000000000000..197be0f2a3fb30 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi DistilBertEmbeddings from fadliaulawi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi` is a English model originally trained by fadliaulawi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi_en_5.1.2_3.0_1694775951105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi_en_5.1.2_3.0_1694775951105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_fadliaulawi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/fadliaulawi/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_francesc_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_francesc_en.md new file mode 100644 index 00000000000000..675ac5f986fe1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_francesc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_francesc DistilBertEmbeddings from Francesc +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_francesc +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_francesc` is a English model originally trained by Francesc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_francesc_en_5.1.2_3.0_1694787169835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_francesc_en_5.1.2_3.0_1694787169835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_francesc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_francesc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_francesc| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Francesc/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ghost1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ghost1_en.md new file mode 100644 index 00000000000000..4220b4ef365618 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ghost1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_ghost1 DistilBertEmbeddings from Ghost1 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_ghost1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_ghost1` is a English model originally trained by Ghost1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ghost1_en_5.1.2_3.0_1694770280423.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ghost1_en_5.1.2_3.0_1694770280423.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_ghost1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_ghost1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_ghost1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ghost1/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_golightly_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_golightly_en.md new file mode 100644 index 00000000000000..262d8869f48e30 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_golightly_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_golightly DistilBertEmbeddings from golightly +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_golightly +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_golightly` is a English model originally trained by golightly. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_golightly_en_5.1.2_3.0_1694790895420.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_golightly_en_5.1.2_3.0_1694790895420.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_golightly","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_golightly", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_golightly| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/golightly/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_gyronee_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_gyronee_en.md new file mode 100644 index 00000000000000..0a1097aa2f652b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_gyronee_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_gyronee DistilBertEmbeddings from gyronee +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_gyronee +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_gyronee` is a English model originally trained by gyronee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_gyronee_en_5.1.2_3.0_1694778898355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_gyronee_en_5.1.2_3.0_1694778898355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_gyronee","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_gyronee", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_gyronee| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gyronee/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira_en.md new file mode 100644 index 00000000000000..ddb2c158e6c39e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira DistilBertEmbeddings from hilariooliveira +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira` is a English model originally trained by hilariooliveira. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira_en_5.1.2_3.0_1694791662511.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira_en_5.1.2_3.0_1694791662511.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_hilariooliveira| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hilariooliveira/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan_en.md new file mode 100644 index 00000000000000..78b51cd3e879dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan DistilBertEmbeddings from hsiehpinghan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan` is a English model originally trained by hsiehpinghan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan_en_5.1.2_3.0_1694771644406.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan_en_5.1.2_3.0_1694771644406.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_hsiehpinghan| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hsiehpinghan/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_icity_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_icity_en.md new file mode 100644 index 00000000000000..ed3134fc2b2935 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_icity_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_icity DistilBertEmbeddings from icity +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_icity +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_icity` is a English model originally trained by icity. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_icity_en_5.1.2_3.0_1694770827492.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_icity_en_5.1.2_3.0_1694770827492.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_icity","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_icity", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_icity| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/icity/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim_en.md new file mode 100644 index 00000000000000..b42c32039f9035 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim DistilBertEmbeddings from JakeYunwooKim +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim` is a English model originally trained by JakeYunwooKim. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim_en_5.1.2_3.0_1694786783189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim_en_5.1.2_3.0_1694786783189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_jakeyunwookim| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JakeYunwooKim/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jcai1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jcai1_en.md new file mode 100644 index 00000000000000..e7b062a92e71a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jcai1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_jcai1 DistilBertEmbeddings from jcai1 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_jcai1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_jcai1` is a English model originally trained by jcai1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jcai1_en_5.1.2_3.0_1694771220693.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jcai1_en_5.1.2_3.0_1694771220693.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jcai1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jcai1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_jcai1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jcai1/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jdang_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jdang_en.md new file mode 100644 index 00000000000000..8a483e649a4d0a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jdang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_jdang DistilBertEmbeddings from jdang +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_jdang +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_jdang` is a English model originally trained by jdang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jdang_en_5.1.2_3.0_1694778406635.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jdang_en_5.1.2_3.0_1694778406635.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jdang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jdang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_jdang| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jdang/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman_en.md new file mode 100644 index 00000000000000..ba8fc381e23e65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman DistilBertEmbeddings from JJinBBangMan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman` is a English model originally trained by JJinBBangMan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman_en_5.1.2_3.0_1694787149226.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman_en_5.1.2_3.0_1694787149226.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_jjinbbangman| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JJinBBangMan/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk_en.md new file mode 100644 index 00000000000000..920e66d668be9d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk DistilBertEmbeddings from johnyyhk +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk` is a English model originally trained by johnyyhk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk_en_5.1.2_3.0_1694785389324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk_en_5.1.2_3.0_1694785389324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_johnyyhk| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/johnyyhk/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter_en.md new file mode 100644 index 00000000000000..d7c4c082430879 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter DistilBertEmbeddings from jordanblatter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter` is a English model originally trained by jordanblatter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter_en_5.1.2_3.0_1694783143004.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter_en_5.1.2_3.0_1694783143004.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_jordanblatter| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jordanblatter/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jwchung_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jwchung_en.md new file mode 100644 index 00000000000000..092eb224e14fc9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_jwchung_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_jwchung DistilBertEmbeddings from jwchung +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_jwchung +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_jwchung` is a English model originally trained by jwchung. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jwchung_en_5.1.2_3.0_1694774483686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_jwchung_en_5.1.2_3.0_1694774483686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jwchung","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_jwchung", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_jwchung| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jwchung/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825_en.md new file mode 100644 index 00000000000000..544093deb279cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825 DistilBertEmbeddings from larryboy825 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825` is a English model originally trained by larryboy825. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825_en_5.1.2_3.0_1694775543945.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825_en_5.1.2_3.0_1694775543945.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_larryboy825| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/larryboy825/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_lewtun_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_lewtun_en.md new file mode 100644 index 00000000000000..b6851fc15b528a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_lewtun_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_lewtun DistilBertEmbeddings from lewtun +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_lewtun +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_lewtun` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_lewtun_en_5.1.2_3.0_1694781429360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_lewtun_en_5.1.2_3.0_1694781429360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_lewtun","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_lewtun", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_lewtun| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lewtun/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_liquannan_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_liquannan_en.md new file mode 100644 index 00000000000000..45619a606744cb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_liquannan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_liquannan DistilBertEmbeddings from liquannan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_liquannan +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_liquannan` is a English model originally trained by liquannan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_liquannan_en_5.1.2_3.0_1694770331398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_liquannan_en_5.1.2_3.0_1694770331398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_liquannan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_liquannan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_liquannan| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/liquannan/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_luzimu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_luzimu_en.md new file mode 100644 index 00000000000000..07af92acd37c4e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_luzimu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_luzimu DistilBertEmbeddings from luzimu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_luzimu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_luzimu` is a English model originally trained by luzimu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_luzimu_en_5.1.2_3.0_1694783067580.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_luzimu_en_5.1.2_3.0_1694783067580.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_luzimu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_luzimu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_luzimu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/luzimu/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013_en.md new file mode 100644 index 00000000000000..def64ecc5b5171 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013 DistilBertEmbeddings from lyk0013 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013` is a English model originally trained by lyk0013. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013_en_5.1.2_3.0_1694790463669.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013_en_5.1.2_3.0_1694790463669.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_lyk0013| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lyk0013/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh_en.md new file mode 100644 index 00000000000000..a9542906b549d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh DistilBertEmbeddings from magnustragardh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh` is a English model originally trained by magnustragardh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh_en_5.1.2_3.0_1694784482897.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh_en_5.1.2_3.0_1694784482897.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_magnustragardh| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/magnustragardh/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8_en.md new file mode 100644 index 00000000000000..2b3e5da9589bc4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8 DistilBertEmbeddings from Mascariddu8 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8` is a English model originally trained by Mascariddu8. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8_en_5.1.2_3.0_1694779288740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8_en_5.1.2_3.0_1694779288740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_mascariddu8| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Mascariddu8/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mbateman_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mbateman_en.md new file mode 100644 index 00000000000000..22a0f28c52b9a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mbateman_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_mbateman DistilBertEmbeddings from mbateman +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_mbateman +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_mbateman` is a English model originally trained by mbateman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_mbateman_en_5.1.2_3.0_1694781933141.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_mbateman_en_5.1.2_3.0_1694781933141.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_mbateman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_mbateman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_mbateman| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mbateman/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mulinski_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mulinski_en.md new file mode 100644 index 00000000000000..dd2d6c101a66db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_mulinski_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_mulinski DistilBertEmbeddings from mulinski +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_mulinski +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_mulinski` is a English model originally trained by mulinski. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_mulinski_en_5.1.2_3.0_1694776279605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_mulinski_en_5.1.2_3.0_1694776279605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_mulinski","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_mulinski", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_mulinski| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mulinski/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri_en.md new file mode 100644 index 00000000000000..2775e40d77279a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri DistilBertEmbeddings from pavle-tsotskolauri +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri` is a English model originally trained by pavle-tsotskolauri. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri_en_5.1.2_3.0_1694771323246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri_en_5.1.2_3.0_1694771323246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_pavle_tsotskolauri| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/pavle-tsotskolauri/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu_en.md new file mode 100644 index 00000000000000..ebf6dd4c7409e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu DistilBertEmbeddings from peterhsu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu` is a English model originally trained by peterhsu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu_en_5.1.2_3.0_1694783408621.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu_en_5.1.2_3.0_1694783408621.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_peterhsu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/peterhsu/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_physhunter_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_physhunter_en.md new file mode 100644 index 00000000000000..cf2aef600f03a9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_physhunter_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_physhunter DistilBertEmbeddings from PhysHunter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_physhunter +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_physhunter` is a English model originally trained by PhysHunter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_physhunter_en_5.1.2_3.0_1694773235874.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_physhunter_en_5.1.2_3.0_1694773235874.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_physhunter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_physhunter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_physhunter| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/PhysHunter/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88_en.md new file mode 100644 index 00000000000000..34649d44d2013b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88 DistilBertEmbeddings from qianyu88 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88` is a English model originally trained by qianyu88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88_en_5.1.2_3.0_1694791575645.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88_en_5.1.2_3.0_1694791575645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_qianyu88| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/qianyu88/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka_en.md new file mode 100644 index 00000000000000..da7a86034d5757 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka DistilBertEmbeddings from RajkNakka +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka` is a English model originally trained by RajkNakka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka_en_5.1.2_3.0_1694789718500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka_en_5.1.2_3.0_1694789718500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_rajknakka| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RajkNakka/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn_en.md new file mode 100644 index 00000000000000..ff062179b335a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn DistilBertEmbeddings from rdvdsn +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn` is a English model originally trained by rdvdsn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn_en_5.1.2_3.0_1694786749532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn_en_5.1.2_3.0_1694786749532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_rdvdsn| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rdvdsn/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rugo_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rugo_en.md new file mode 100644 index 00000000000000..3958d058e5c25f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rugo_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_rugo DistilBertEmbeddings from rugo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_rugo +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_rugo` is a English model originally trained by rugo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rugo_en_5.1.2_3.0_1694783338406.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rugo_en_5.1.2_3.0_1694783338406.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rugo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rugo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_rugo| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rugo/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh_en.md new file mode 100644 index 00000000000000..f3ac492a2c078a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh DistilBertEmbeddings from Rushikesh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh` is a English model originally trained by Rushikesh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh_en_5.1.2_3.0_1694782340471.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh_en_5.1.2_3.0_1694782340471.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_rushikesh| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Rushikesh/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai_en.md new file mode 100644 index 00000000000000..e5c2a2540731da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai DistilBertEmbeddings from ryanlai +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai` is a English model originally trained by ryanlai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai_en_5.1.2_3.0_1694787087716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai_en_5.1.2_3.0_1694787087716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_ryanlai| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ryanlai/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44_en.md new file mode 100644 index 00000000000000..a6de82726a327b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44 DistilBertEmbeddings from sarthakc44 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44` is a English model originally trained by sarthakc44. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44_en_5.1.2_3.0_1694773431276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44_en_5.1.2_3.0_1694773431276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_sarthakc44| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sarthakc44/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41_en.md new file mode 100644 index 00000000000000..5671f78b475aa1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41 DistilBertEmbeddings from ShadowTwin41 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41` is a English model originally trained by ShadowTwin41. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41_en_5.1.2_3.0_1694787894599.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41_en_5.1.2_3.0_1694787894599.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_shadowtwin41| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ShadowTwin41/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_spasis_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_spasis_en.md new file mode 100644 index 00000000000000..59dd4caf3680e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_spasis_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_spasis DistilBertEmbeddings from spasis +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_spasis +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_spasis` is a English model originally trained by spasis. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_spasis_en_5.1.2_3.0_1694787046946.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_spasis_en_5.1.2_3.0_1694787046946.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_spasis","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_spasis", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_spasis| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/spasis/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71_en.md new file mode 100644 index 00000000000000..168ad312f748ca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71 DistilBertEmbeddings from sungchun71 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71` is a English model originally trained by sungchun71. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71_en_5.1.2_3.0_1694774001255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71_en_5.1.2_3.0_1694774001255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_sungchun71| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sungchun71/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_surjray_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_surjray_en.md new file mode 100644 index 00000000000000..50675e3b7168be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_surjray_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_surjray DistilBertEmbeddings from surjray +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_surjray +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_surjray` is a English model originally trained by surjray. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_surjray_en_5.1.2_3.0_1694778413933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_surjray_en_5.1.2_3.0_1694778413933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_surjray","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_surjray", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_surjray| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/surjray/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_talhaa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_talhaa_en.md new file mode 100644 index 00000000000000..bb2995994b8b52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_talhaa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_talhaa DistilBertEmbeddings from talhaa +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_talhaa +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_talhaa` is a English model originally trained by talhaa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_talhaa_en_5.1.2_3.0_1694786575831.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_talhaa_en_5.1.2_3.0_1694786575831.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_talhaa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_talhaa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_talhaa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/talhaa/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan_en.md new file mode 100644 index 00000000000000..2f71074cd131bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan DistilBertEmbeddings from tanvirkhan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan` is a English model originally trained by tanvirkhan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan_en_5.1.2_3.0_1694785375336.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan_en_5.1.2_3.0_1694785375336.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_tanvirkhan| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tanvirkhan/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_terps_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_terps_en.md new file mode 100644 index 00000000000000..c0c74f701c926f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_terps_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_terps DistilBertEmbeddings from Terps +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_terps +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_terps` is a English model originally trained by Terps. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_terps_en_5.1.2_3.0_1694790594312.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_terps_en_5.1.2_3.0_1694790594312.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_terps","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_terps", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_terps| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Terps/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_thangvip_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_thangvip_en.md new file mode 100644 index 00000000000000..63313f6803eaca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_thangvip_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_thangvip DistilBertEmbeddings from thangvip +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_thangvip +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_thangvip` is a English model originally trained by thangvip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_thangvip_en_5.1.2_3.0_1694791316149.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_thangvip_en_5.1.2_3.0_1694791316149.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_thangvip","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_thangvip", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_thangvip| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/thangvip/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi_en.md new file mode 100644 index 00000000000000..2e448cb14dad75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi DistilBertEmbeddings from ThetaPhiPsi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi` is a English model originally trained by ThetaPhiPsi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi_en_5.1.2_3.0_1694781360414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi_en_5.1.2_3.0_1694781360414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_thetaphipsi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ThetaPhiPsi/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_threite_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_threite_en.md new file mode 100644 index 00000000000000..3904d3683f92ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_threite_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_threite DistilBertEmbeddings from threite +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_threite +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_threite` is a English model originally trained by threite. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_threite_en_5.1.2_3.0_1694788276759.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_threite_en_5.1.2_3.0_1694788276759.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_threite","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_threite", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_threite| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/threite/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_timtl_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_timtl_en.md new file mode 100644 index 00000000000000..66aae5f7a59396 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_timtl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_timtl DistilBertEmbeddings from TimTL +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_timtl +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_timtl` is a English model originally trained by TimTL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_timtl_en_5.1.2_3.0_1694787470542.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_timtl_en_5.1.2_3.0_1694787470542.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_timtl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_timtl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_timtl| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/TimTL/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama_en.md new file mode 100644 index 00000000000000..9f848af5608b19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama DistilBertEmbeddings from tkoyama +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama` is a English model originally trained by tkoyama. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama_en_5.1.2_3.0_1694788742865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama_en_5.1.2_3.0_1694788742865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_tkoyama| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tkoyama/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev_en.md new file mode 100644 index 00000000000000..8ea35a9d79670e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev DistilBertEmbeddings from tsobolev +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev` is a English model originally trained by tsobolev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev_en_5.1.2_3.0_1694789508240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev_en_5.1.2_3.0_1694789508240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_tsobolev| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tsobolev/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic_en.md new file mode 100644 index 00000000000000..51cdd81703037a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic DistilBertEmbeddings from ttmusic +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic` is a English model originally trained by ttmusic. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic_en_5.1.2_3.0_1694786680946.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic_en_5.1.2_3.0_1694786680946.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_ttmusic| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ttmusic/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_udoy_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_udoy_en.md new file mode 100644 index 00000000000000..ae85c58cf729ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_udoy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_udoy DistilBertEmbeddings from Udoy +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_udoy +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_udoy` is a English model originally trained by Udoy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_udoy_en_5.1.2_3.0_1694787044089.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_udoy_en_5.1.2_3.0_1694787044089.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_udoy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_udoy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_udoy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Udoy/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan_en.md new file mode 100644 index 00000000000000..9e9e16648533d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan DistilBertEmbeddings from VanHoan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan` is a English model originally trained by VanHoan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan_en_5.1.2_3.0_1694784218282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan_en_5.1.2_3.0_1694784218282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_vanhoan| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/VanHoan/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vives_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vives_en.md new file mode 100644 index 00000000000000..0fa2a9ebe6b44c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vives_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_vives DistilBertEmbeddings from vives +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_vives +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_vives` is a English model originally trained by vives. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_vives_en_5.1.2_3.0_1694772625562.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_vives_en_5.1.2_3.0_1694772625562.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_vives","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_vives", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_vives| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vives/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas_en.md new file mode 100644 index 00000000000000..353f294c12a3b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas DistilBertEmbeddings from vsrinivas +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas` is a English model originally trained by vsrinivas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas_en_5.1.2_3.0_1694788081981.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas_en_5.1.2_3.0_1694788081981.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_vsrinivas| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vsrinivas/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng_en.md new file mode 100644 index 00000000000000..f55dcf70c098ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng DistilBertEmbeddings from wangmiaobeng +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng` is a English model originally trained by wangmiaobeng. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng_en_5.1.2_3.0_1694790638480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng_en_5.1.2_3.0_1694790638480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_wangmiaobeng| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/wangmiaobeng/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu_en.md new file mode 100644 index 00000000000000..62724f9be9198a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu DistilBertEmbeddings from WayneChiu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu` is a English model originally trained by WayneChiu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu_en_5.1.2_3.0_1694788869882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu_en_5.1.2_3.0_1694788869882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_waynechiu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/WayneChiu/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_winson_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_winson_en.md new file mode 100644 index 00000000000000..4769f7d193d153 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_winson_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_winson DistilBertEmbeddings from winson +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_winson +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_winson` is a English model originally trained by winson. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_winson_en_5.1.2_3.0_1694776770579.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_winson_en_5.1.2_3.0_1694776770579.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_winson","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_winson", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_winson| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/winson/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_xkang_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_xkang_en.md new file mode 100644 index 00000000000000..4e0213c75e57b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_xkang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_xkang DistilBertEmbeddings from xkang +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_xkang +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_xkang` is a English model originally trained by xkang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_xkang_en_5.1.2_3.0_1694785805536.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_xkang_en_5.1.2_3.0_1694785805536.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_xkang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_xkang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_xkang| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/xkang/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko_en.md new file mode 100644 index 00000000000000..4f460dbcbda848 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko DistilBertEmbeddings from yangwooko +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko` is a English model originally trained by yangwooko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko_en_5.1.2_3.0_1694777159804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko_en_5.1.2_3.0_1694777159804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_yangwooko| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/yangwooko/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_yuto01_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_yuto01_en.md new file mode 100644 index 00000000000000..38b5cea503fb62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_accelerate_yuto01_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_accelerate_yuto01 DistilBertEmbeddings from Yuto01 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_accelerate_yuto01 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_accelerate_yuto01` is a English model originally trained by Yuto01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_yuto01_en_5.1.2_3.0_1694789597628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_accelerate_yuto01_en_5.1.2_3.0_1694789597628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_yuto01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_accelerate_yuto01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_accelerate_yuto01| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Yuto01/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_akashmaggon_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_akashmaggon_en.md new file mode 100644 index 00000000000000..d074260299e72b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_akashmaggon_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_akashmaggon DistilBertEmbeddings from akashmaggon +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_akashmaggon +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_akashmaggon` is a English model originally trained by akashmaggon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_akashmaggon_en_5.1.2_3.0_1694786916828.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_akashmaggon_en_5.1.2_3.0_1694786916828.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_akashmaggon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_akashmaggon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_akashmaggon| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/akashmaggon/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_akazad_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_akazad_en.md new file mode 100644 index 00000000000000..c92d705e708910 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_akazad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_akazad DistilBertEmbeddings from akazad +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_akazad +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_akazad` is a English model originally trained by akazad. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_akazad_en_5.1.2_3.0_1694789344568.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_akazad_en_5.1.2_3.0_1694789344568.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_akazad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_akazad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_akazad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/akazad/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_andrewr_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_andrewr_en.md new file mode 100644 index 00000000000000..6caaef76de79df --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_andrewr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_andrewr DistilBertEmbeddings from AndrewR +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_andrewr +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_andrewr` is a English model originally trained by AndrewR. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_andrewr_en_5.1.2_3.0_1694790355579.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_andrewr_en_5.1.2_3.0_1694790355579.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_andrewr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_andrewr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_andrewr| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/AndrewR/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_anikaai_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_anikaai_en.md new file mode 100644 index 00000000000000..cd332606000491 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_anikaai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_anikaai DistilBertEmbeddings from AnikaAI +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_anikaai +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_anikaai` is a English model originally trained by AnikaAI. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_anikaai_en_5.1.2_3.0_1694789041659.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_anikaai_en_5.1.2_3.0_1694789041659.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_anikaai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_anikaai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_anikaai| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/AnikaAI/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_anthonyyazdani_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_anthonyyazdani_en.md new file mode 100644 index 00000000000000..deccf083caa8ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_anthonyyazdani_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_anthonyyazdani DistilBertEmbeddings from anthonyyazdani +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_anthonyyazdani +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_anthonyyazdani` is a English model originally trained by anthonyyazdani. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_anthonyyazdani_en_5.1.2_3.0_1694771270494.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_anthonyyazdani_en_5.1.2_3.0_1694771270494.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_anthonyyazdani","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_anthonyyazdani", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_anthonyyazdani| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/anthonyyazdani/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_aoill_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_aoill_en.md new file mode 100644 index 00000000000000..dee27866081dae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_aoill_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_aoill DistilBertEmbeddings from aoill +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_aoill +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_aoill` is a English model originally trained by aoill. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_aoill_en_5.1.2_3.0_1694775321830.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_aoill_en_5.1.2_3.0_1694775321830.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_aoill","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_aoill", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_aoill| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/aoill/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_apatidar0_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_apatidar0_en.md new file mode 100644 index 00000000000000..5195f31d24a28d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_apatidar0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_apatidar0 DistilBertEmbeddings from apatidar0 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_apatidar0 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_apatidar0` is a English model originally trained by apatidar0. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_apatidar0_en_5.1.2_3.0_1694789699531.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_apatidar0_en_5.1.2_3.0_1694789699531.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_apatidar0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_apatidar0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_apatidar0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/apatidar0/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arjun9689_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arjun9689_en.md new file mode 100644 index 00000000000000..c07f8a4f293a7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arjun9689_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_arjun9689 DistilBertEmbeddings from arjun9689 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_arjun9689 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_arjun9689` is a English model originally trained by arjun9689. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_arjun9689_en_5.1.2_3.0_1694777211475.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_arjun9689_en_5.1.2_3.0_1694777211475.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_arjun9689","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_arjun9689", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_arjun9689| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/arjun9689/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arthuerwang_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arthuerwang_en.md new file mode 100644 index 00000000000000..c58b8fd2284322 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arthuerwang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_arthuerwang DistilBertEmbeddings from Arthuerwang +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_arthuerwang +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_arthuerwang` is a English model originally trained by Arthuerwang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_arthuerwang_en_5.1.2_3.0_1694782618860.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_arthuerwang_en_5.1.2_3.0_1694782618860.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_arthuerwang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_arthuerwang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_arthuerwang| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Arthuerwang/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arunadiraju_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arunadiraju_en.md new file mode 100644 index 00000000000000..665da3a304af55 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_arunadiraju_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_arunadiraju DistilBertEmbeddings from arunadiraju +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_arunadiraju +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_arunadiraju` is a English model originally trained by arunadiraju. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_arunadiraju_en_5.1.2_3.0_1694779661933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_arunadiraju_en_5.1.2_3.0_1694779661933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_arunadiraju","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_arunadiraju", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_arunadiraju| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/arunadiraju/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_at2507_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_at2507_en.md new file mode 100644 index 00000000000000..e3722b6b69c050 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_at2507_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_at2507 DistilBertEmbeddings from at2507 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_at2507 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_at2507` is a English model originally trained by at2507. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_at2507_en_5.1.2_3.0_1694772852025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_at2507_en_5.1.2_3.0_1694772852025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_at2507","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_at2507", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_at2507| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/at2507/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_atiiisham988_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_atiiisham988_en.md new file mode 100644 index 00000000000000..06e68f572a0e5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_atiiisham988_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_atiiisham988 DistilBertEmbeddings from atiiisham988 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_atiiisham988 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_atiiisham988` is a English model originally trained by atiiisham988. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_atiiisham988_en_5.1.2_3.0_1694772504810.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_atiiisham988_en_5.1.2_3.0_1694772504810.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_atiiisham988","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_atiiisham988", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_atiiisham988| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/atiiisham988/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_averageandyyy_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_averageandyyy_en.md new file mode 100644 index 00000000000000..c785d4e81bc0c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_averageandyyy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_averageandyyy DistilBertEmbeddings from averageandyyy +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_averageandyyy +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_averageandyyy` is a English model originally trained by averageandyyy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_averageandyyy_en_5.1.2_3.0_1694776247161.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_averageandyyy_en_5.1.2_3.0_1694776247161.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_averageandyyy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_averageandyyy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_averageandyyy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/averageandyyy/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_bigearhututu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_bigearhututu_en.md new file mode 100644 index 00000000000000..8be5c1fca2c171 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_bigearhututu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_bigearhututu DistilBertEmbeddings from bigearhututu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_bigearhututu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_bigearhututu` is a English model originally trained by bigearhututu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_bigearhututu_en_5.1.2_3.0_1694775113143.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_bigearhututu_en_5.1.2_3.0_1694775113143.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_bigearhututu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_bigearhututu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_bigearhututu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/bigearhututu/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_brenton_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_brenton_en.md new file mode 100644 index 00000000000000..882b5cfb3e4a5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_brenton_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_brenton DistilBertEmbeddings from brenton +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_brenton +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_brenton` is a English model originally trained by brenton. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_brenton_en_5.1.2_3.0_1694787537664.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_brenton_en_5.1.2_3.0_1694787537664.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_brenton","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_brenton", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_brenton| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/brenton/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_bubblejoe_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_bubblejoe_en.md new file mode 100644 index 00000000000000..95708a24fcafd8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_bubblejoe_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_bubblejoe DistilBertEmbeddings from BubbleJoe +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_bubblejoe +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_bubblejoe` is a English model originally trained by BubbleJoe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_bubblejoe_en_5.1.2_3.0_1694782227859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_bubblejoe_en_5.1.2_3.0_1694782227859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_bubblejoe","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_bubblejoe", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_bubblejoe| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BubbleJoe/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_caroline_betbeze_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_caroline_betbeze_en.md new file mode 100644 index 00000000000000..757fdc53be5b41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_caroline_betbeze_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_caroline_betbeze DistilBertEmbeddings from caroline-betbeze +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_caroline_betbeze +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_caroline_betbeze` is a English model originally trained by caroline-betbeze. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_caroline_betbeze_en_5.1.2_3.0_1694782648414.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_caroline_betbeze_en_5.1.2_3.0_1694782648414.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_caroline_betbeze","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_caroline_betbeze", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_caroline_betbeze| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/caroline-betbeze/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cartinoe5930_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cartinoe5930_en.md new file mode 100644 index 00000000000000..e859e17b692462 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cartinoe5930_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_cartinoe5930 DistilBertEmbeddings from Cartinoe5930 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_cartinoe5930 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_cartinoe5930` is a English model originally trained by Cartinoe5930. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cartinoe5930_en_5.1.2_3.0_1694784085673.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cartinoe5930_en_5.1.2_3.0_1694784085673.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_cartinoe5930","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_cartinoe5930", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_cartinoe5930| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Cartinoe5930/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cchychen_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cchychen_en.md new file mode 100644 index 00000000000000..cf78c891438e85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cchychen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_cchychen DistilBertEmbeddings from Cchychen +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_cchychen +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_cchychen` is a English model originally trained by Cchychen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cchychen_en_5.1.2_3.0_1694786158859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cchychen_en_5.1.2_3.0_1694786158859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_cchychen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_cchychen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_cchychen| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Cchychen/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_chenxingphh_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_chenxingphh_en.md new file mode 100644 index 00000000000000..14a89ef3a0f937 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_chenxingphh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_chenxingphh DistilBertEmbeddings from chenxingphh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_chenxingphh +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_chenxingphh` is a English model originally trained by chenxingphh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_chenxingphh_en_5.1.2_3.0_1694771729067.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_chenxingphh_en_5.1.2_3.0_1694771729067.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_chenxingphh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_chenxingphh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_chenxingphh| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/chenxingphh/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cindymc_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cindymc_en.md new file mode 100644 index 00000000000000..64f068843d732a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cindymc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_cindymc DistilBertEmbeddings from cindymc +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_cindymc +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_cindymc` is a English model originally trained by cindymc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cindymc_en_5.1.2_3.0_1694786805038.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cindymc_en_5.1.2_3.0_1694786805038.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_cindymc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_cindymc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_cindymc| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/cindymc/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cl_wood_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cl_wood_en.md new file mode 100644 index 00000000000000..5ce33cfc4167bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cl_wood_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_cl_wood DistilBertEmbeddings from cl-wood +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_cl_wood +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_cl_wood` is a English model originally trained by cl-wood. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cl_wood_en_5.1.2_3.0_1694770937006.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cl_wood_en_5.1.2_3.0_1694770937006.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_cl_wood","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_cl_wood", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_cl_wood| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/cl-wood/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cleandata_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cleandata_en.md new file mode 100644 index 00000000000000..038ccccc225a70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cleandata_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_cleandata DistilBertEmbeddings from cleandata +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_cleandata +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_cleandata` is a English model originally trained by cleandata. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cleandata_en_5.1.2_3.0_1694787303361.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cleandata_en_5.1.2_3.0_1694787303361.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_cleandata","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_cleandata", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_cleandata| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/cleandata/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_codeplay_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_codeplay_en.md new file mode 100644 index 00000000000000..65603cc5098af2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_codeplay_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_codeplay DistilBertEmbeddings from codeplay +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_codeplay +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_codeplay` is a English model originally trained by codeplay. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_codeplay_en_5.1.2_3.0_1694787263429.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_codeplay_en_5.1.2_3.0_1694787263429.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_codeplay","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_codeplay", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_codeplay| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/codeplay/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_coreyabs_db_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_coreyabs_db_en.md new file mode 100644 index 00000000000000..4d0585f4f3aa94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_coreyabs_db_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_coreyabs_db DistilBertEmbeddings from coreyabs-db +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_coreyabs_db +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_coreyabs_db` is a English model originally trained by coreyabs-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_coreyabs_db_en_5.1.2_3.0_1694781268724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_coreyabs_db_en_5.1.2_3.0_1694781268724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_coreyabs_db","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_coreyabs_db", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_coreyabs_db| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/coreyabs-db/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cssupport_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cssupport_en.md new file mode 100644 index 00000000000000..8b1b618b1fe744 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_cssupport_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_cssupport DistilBertEmbeddings from cssupport +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_cssupport +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_cssupport` is a English model originally trained by cssupport. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cssupport_en_5.1.2_3.0_1694781174746.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_cssupport_en_5.1.2_3.0_1694781174746.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_cssupport","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_cssupport", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_cssupport| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/cssupport/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dave_sheets_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dave_sheets_en.md new file mode 100644 index 00000000000000..6f5258d356e46f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dave_sheets_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dave_sheets DistilBertEmbeddings from Dave-Sheets +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dave_sheets +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dave_sheets` is a English model originally trained by Dave-Sheets. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dave_sheets_en_5.1.2_3.0_1694774885308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dave_sheets_en_5.1.2_3.0_1694774885308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_dave_sheets","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_dave_sheets", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dave_sheets| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Dave-Sheets/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_debug_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_debug_en.md new file mode 100644 index 00000000000000..1c2260c9ed02fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_debug_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_debug DistilBertEmbeddings from lewtun +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_debug +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_debug` is a English model originally trained by lewtun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_debug_en_5.1.2_3.0_1694785359537.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_debug_en_5.1.2_3.0_1694785359537.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_debug","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_debug", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_debug| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lewtun/distilbert-base-uncased-finetuned-imdb-debug \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_delusionaldreams_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_delusionaldreams_en.md new file mode 100644 index 00000000000000..30583928480413 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_delusionaldreams_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_delusionaldreams DistilBertEmbeddings from DelusionalDreams +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_delusionaldreams +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_delusionaldreams` is a English model originally trained by DelusionalDreams. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_delusionaldreams_en_5.1.2_3.0_1694772062647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_delusionaldreams_en_5.1.2_3.0_1694772062647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_delusionaldreams","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_delusionaldreams", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_delusionaldreams| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/DelusionalDreams/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dewa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dewa_en.md new file mode 100644 index 00000000000000..f0bf012d043671 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dewa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dewa DistilBertEmbeddings from Dewa +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dewa +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dewa` is a English model originally trained by Dewa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dewa_en_5.1.2_3.0_1694777331208.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dewa_en_5.1.2_3.0_1694777331208.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_dewa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_dewa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dewa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Dewa/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dieexbr_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dieexbr_en.md new file mode 100644 index 00000000000000..c0fc37a5751a9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dieexbr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dieexbr DistilBertEmbeddings from dieexbr +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dieexbr +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dieexbr` is a English model originally trained by dieexbr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dieexbr_en_5.1.2_3.0_1694771700734.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dieexbr_en_5.1.2_3.0_1694771700734.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_dieexbr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_dieexbr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dieexbr| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dieexbr/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dipika09_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dipika09_en.md new file mode 100644 index 00000000000000..9454ae3c2c31f5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dipika09_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dipika09 DistilBertEmbeddings from Dipika09 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dipika09 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dipika09` is a English model originally trained by Dipika09. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dipika09_en_5.1.2_3.0_1694789723756.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dipika09_en_5.1.2_3.0_1694789723756.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_dipika09","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_dipika09", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dipika09| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Dipika09/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dmlea_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dmlea_en.md new file mode 100644 index 00000000000000..bcc0834ba821ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dmlea_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dmlea DistilBertEmbeddings from dmlea +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dmlea +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dmlea` is a English model originally trained by dmlea. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dmlea_en_5.1.2_3.0_1694790044524.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dmlea_en_5.1.2_3.0_1694790044524.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_dmlea","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_dmlea", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dmlea| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dmlea/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_drdspace_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_drdspace_en.md new file mode 100644 index 00000000000000..dbd7a7611ec4b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_drdspace_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_drdspace DistilBertEmbeddings from drdspace +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_drdspace +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_drdspace` is a English model originally trained by drdspace. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_drdspace_en_5.1.2_3.0_1694783734326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_drdspace_en_5.1.2_3.0_1694783734326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_drdspace","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_drdspace", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_drdspace| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/drdspace/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dshvetsov_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dshvetsov_en.md new file mode 100644 index 00000000000000..855ea38f4559aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_dshvetsov_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_dshvetsov DistilBertEmbeddings from dshvetsov +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_dshvetsov +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_dshvetsov` is a English model originally trained by dshvetsov. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dshvetsov_en_5.1.2_3.0_1694782739474.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_dshvetsov_en_5.1.2_3.0_1694782739474.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_dshvetsov","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_dshvetsov", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_dshvetsov| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dshvetsov/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ecosystem_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ecosystem_en.md new file mode 100644 index 00000000000000..4f12510ac207f2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ecosystem_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ecosystem DistilBertEmbeddings from ecosystem +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ecosystem +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ecosystem` is a English model originally trained by ecosystem. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ecosystem_en_5.1.2_3.0_1694776959436.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ecosystem_en_5.1.2_3.0_1694776959436.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_ecosystem","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_ecosystem", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ecosystem| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ecosystem/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_edraper88_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_edraper88_en.md new file mode 100644 index 00000000000000..7f067a64f33b16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_edraper88_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_edraper88 DistilBertEmbeddings from edraper88 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_edraper88 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_edraper88` is a English model originally trained by edraper88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_edraper88_en_5.1.2_3.0_1694791144038.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_edraper88_en_5.1.2_3.0_1694791144038.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_edraper88","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_edraper88", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_edraper88| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/edraper88/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_eitanli_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_eitanli_en.md new file mode 100644 index 00000000000000..7b9b7ffbf63ba6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_eitanli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_eitanli DistilBertEmbeddings from Eitanli +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_eitanli +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_eitanli` is a English model originally trained by Eitanli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_eitanli_en_5.1.2_3.0_1694785816192.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_eitanli_en_5.1.2_3.0_1694785816192.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_eitanli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_eitanli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_eitanli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Eitanli/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_elggman_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_elggman_en.md new file mode 100644 index 00000000000000..6fc02e83c10865 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_elggman_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_elggman DistilBertEmbeddings from ELggman +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_elggman +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_elggman` is a English model originally trained by ELggman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_elggman_en_5.1.2_3.0_1694784844116.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_elggman_en_5.1.2_3.0_1694784844116.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_elggman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_elggman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_elggman| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ELggman/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_eusojk_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_eusojk_en.md new file mode 100644 index 00000000000000..e4096b978b26cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_eusojk_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_eusojk DistilBertEmbeddings from eusojk +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_eusojk +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_eusojk` is a English model originally trained by eusojk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_eusojk_en_5.1.2_3.0_1694784211104.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_eusojk_en_5.1.2_3.0_1694784211104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_eusojk","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_eusojk", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_eusojk| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/eusojk/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_evincent18_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_evincent18_en.md new file mode 100644 index 00000000000000..fcf7eb07b7d19d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_evincent18_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_evincent18 DistilBertEmbeddings from evincent18 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_evincent18 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_evincent18` is a English model originally trained by evincent18. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_evincent18_en_5.1.2_3.0_1694771481378.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_evincent18_en_5.1.2_3.0_1694771481378.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_evincent18","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_evincent18", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_evincent18| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/evincent18/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_fadliaulawi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_fadliaulawi_en.md new file mode 100644 index 00000000000000..23f3b87d5a0281 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_fadliaulawi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_fadliaulawi DistilBertEmbeddings from fadliaulawi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_fadliaulawi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_fadliaulawi` is a English model originally trained by fadliaulawi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_fadliaulawi_en_5.1.2_3.0_1694775718136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_fadliaulawi_en_5.1.2_3.0_1694775718136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_fadliaulawi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_fadliaulawi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_fadliaulawi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/fadliaulawi/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_feeeper_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_feeeper_en.md new file mode 100644 index 00000000000000..e1ef4d25ccb1a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_feeeper_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_feeeper DistilBertEmbeddings from feeeper +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_feeeper +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_feeeper` is a English model originally trained by feeeper. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_feeeper_en_5.1.2_3.0_1694778391045.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_feeeper_en_5.1.2_3.0_1694778391045.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_feeeper","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_feeeper", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_feeeper| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/feeeper/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin_en.md new file mode 100644 index 00000000000000..2e0723b6dccd75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin DistilBertEmbeddings from chenyanjin +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin` is a English model originally trained by chenyanjin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin_en_5.1.2_3.0_1694783843728.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin_en_5.1.2_3.0_1694783843728.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_finetuned_imdb_chenyanjin| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/chenyanjin/distilbert-base-uncased-finetuned-imdb-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_geolearner_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_geolearner_en.md new file mode 100644 index 00000000000000..52165837706ebe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_geolearner_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_geolearner DistilBertEmbeddings from geolearner +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_geolearner +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_geolearner` is a English model originally trained by geolearner. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_geolearner_en_5.1.2_3.0_1694776548569.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_geolearner_en_5.1.2_3.0_1694776548569.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_geolearner","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_geolearner", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_geolearner| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/geolearner/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_gg1313_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_gg1313_en.md new file mode 100644 index 00000000000000..c2c14b7105869c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_gg1313_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_gg1313 DistilBertEmbeddings from Gg1313 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_gg1313 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_gg1313` is a English model originally trained by Gg1313. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gg1313_en_5.1.2_3.0_1694791141518.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gg1313_en_5.1.2_3.0_1694791141518.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_gg1313","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_gg1313", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_gg1313| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Gg1313/distilbert-base-uncased-finetuned_imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_golightly_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_golightly_en.md new file mode 100644 index 00000000000000..c5be2b207d61bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_golightly_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_golightly DistilBertEmbeddings from golightly +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_golightly +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_golightly` is a English model originally trained by golightly. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_golightly_en_5.1.2_3.0_1694790800704.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_golightly_en_5.1.2_3.0_1694790800704.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_golightly","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_golightly", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_golightly| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/golightly/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_gtxygyzb_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_gtxygyzb_en.md new file mode 100644 index 00000000000000..74657841cfd68e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_gtxygyzb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_gtxygyzb DistilBertEmbeddings from gtxygyzb +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_gtxygyzb +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_gtxygyzb` is a English model originally trained by gtxygyzb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gtxygyzb_en_5.1.2_3.0_1694783633507.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_gtxygyzb_en_5.1.2_3.0_1694783633507.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_gtxygyzb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_gtxygyzb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_gtxygyzb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gtxygyzb/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_guidoivetta_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_guidoivetta_en.md new file mode 100644 index 00000000000000..f5f7a5157b5230 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_guidoivetta_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_guidoivetta DistilBertEmbeddings from guidoivetta +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_guidoivetta +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_guidoivetta` is a English model originally trained by guidoivetta. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_guidoivetta_en_5.1.2_3.0_1694788838001.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_guidoivetta_en_5.1.2_3.0_1694788838001.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_guidoivetta","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_guidoivetta", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_guidoivetta| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/guidoivetta/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_harangus_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_harangus_en.md new file mode 100644 index 00000000000000..439cdf3b1dbe0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_harangus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_harangus DistilBertEmbeddings from Harangus +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_harangus +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_harangus` is a English model originally trained by Harangus. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_harangus_en_5.1.2_3.0_1694790359797.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_harangus_en_5.1.2_3.0_1694790359797.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_harangus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_harangus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_harangus| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Harangus/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_harshseth_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_harshseth_en.md new file mode 100644 index 00000000000000..80bee2d129ddce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_harshseth_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_harshseth DistilBertEmbeddings from harshseth +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_harshseth +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_harshseth` is a English model originally trained by harshseth. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_harshseth_en_5.1.2_3.0_1694789500099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_harshseth_en_5.1.2_3.0_1694789500099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_harshseth","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_harshseth", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_harshseth| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/harshseth/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_hemanth11_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_hemanth11_en.md new file mode 100644 index 00000000000000..665fcbc5416e32 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_hemanth11_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_hemanth11 DistilBertEmbeddings from hemanth11 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_hemanth11 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_hemanth11` is a English model originally trained by hemanth11. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_hemanth11_en_5.1.2_3.0_1694791749406.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_hemanth11_en_5.1.2_3.0_1694791749406.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_hemanth11","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_hemanth11", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_hemanth11| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hemanth11/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_holtbui_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_holtbui_en.md new file mode 100644 index 00000000000000..47096c27f24d02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_holtbui_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_holtbui DistilBertEmbeddings from holtbui +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_holtbui +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_holtbui` is a English model originally trained by holtbui. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_holtbui_en_5.1.2_3.0_1694787990455.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_holtbui_en_5.1.2_3.0_1694787990455.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_holtbui","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_holtbui", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_holtbui| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/holtbui/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_huggingface_course_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_huggingface_course_en.md new file mode 100644 index 00000000000000..893e4babd3a088 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_huggingface_course_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_huggingface_course DistilBertEmbeddings from huggingface-course +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_huggingface_course +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_huggingface_course` is a English model originally trained by huggingface-course. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_huggingface_course_en_5.1.2_3.0_1694780655866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_huggingface_course_en_5.1.2_3.0_1694780655866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_huggingface_course","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_huggingface_course", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_huggingface_course| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/huggingface-course/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_hxshen_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_hxshen_en.md new file mode 100644 index 00000000000000..aa74c5c6ede594 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_hxshen_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_hxshen DistilBertEmbeddings from hxshen +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_hxshen +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_hxshen` is a English model originally trained by hxshen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_hxshen_en_5.1.2_3.0_1694788924518.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_hxshen_en_5.1.2_3.0_1694788924518.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_hxshen","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_hxshen", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_hxshen| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hxshen/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_imxxn_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_imxxn_en.md new file mode 100644 index 00000000000000..7e47a9ed008b02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_imxxn_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_imxxn DistilBertEmbeddings from Imxxn +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_imxxn +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_imxxn` is a English model originally trained by Imxxn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_imxxn_en_5.1.2_3.0_1694777338856.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_imxxn_en_5.1.2_3.0_1694777338856.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_imxxn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_imxxn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_imxxn| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Imxxn/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_insub_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_insub_en.md new file mode 100644 index 00000000000000..fde7962a8d9c00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_insub_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_insub DistilBertEmbeddings from insub +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_insub +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_insub` is a English model originally trained by insub. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_insub_en_5.1.2_3.0_1694772369747.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_insub_en_5.1.2_3.0_1694772369747.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_insub","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_insub", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_insub| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/insub/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_iotengtr_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_iotengtr_en.md new file mode 100644 index 00000000000000..b946a0f32dfe1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_iotengtr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_iotengtr DistilBertEmbeddings from iotengtr +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_iotengtr +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_iotengtr` is a English model originally trained by iotengtr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_iotengtr_en_5.1.2_3.0_1694773541551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_iotengtr_en_5.1.2_3.0_1694773541551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_iotengtr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_iotengtr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_iotengtr| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/iotengtr/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_iven5880_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_iven5880_en.md new file mode 100644 index 00000000000000..ce21c11521f919 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_iven5880_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_iven5880 DistilBertEmbeddings from iven5880 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_iven5880 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_iven5880` is a English model originally trained by iven5880. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_iven5880_en_5.1.2_3.0_1694791025301.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_iven5880_en_5.1.2_3.0_1694791025301.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_iven5880","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_iven5880", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_iven5880| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/iven5880/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jaese_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jaese_en.md new file mode 100644 index 00000000000000..ab1c8c738d5acf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jaese_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jaese DistilBertEmbeddings from jaese +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jaese +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jaese` is a English model originally trained by jaese. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jaese_en_5.1.2_3.0_1694791022657.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jaese_en_5.1.2_3.0_1694791022657.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jaese","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_jaese", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jaese| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jaese/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jake777_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jake777_en.md new file mode 100644 index 00000000000000..f1ea6bca6c5b72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jake777_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jake777 DistilBertEmbeddings from JAKE777 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jake777 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jake777` is a English model originally trained by JAKE777. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jake777_en_5.1.2_3.0_1694777853032.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jake777_en_5.1.2_3.0_1694777853032.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jake777","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_jake777", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jake777| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JAKE777/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jchhabra_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jchhabra_en.md new file mode 100644 index 00000000000000..dbd8ea6e5bc839 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jchhabra_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jchhabra DistilBertEmbeddings from jchhabra +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jchhabra +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jchhabra` is a English model originally trained by jchhabra. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jchhabra_en_5.1.2_3.0_1694788774737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jchhabra_en_5.1.2_3.0_1694788774737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jchhabra","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_jchhabra", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jchhabra| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jchhabra/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jjinbbangman_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jjinbbangman_en.md new file mode 100644 index 00000000000000..c3b178dfca6292 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jjinbbangman_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jjinbbangman DistilBertEmbeddings from JJinBBangMan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jjinbbangman +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jjinbbangman` is a English model originally trained by JJinBBangMan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jjinbbangman_en_5.1.2_3.0_1694787034924.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jjinbbangman_en_5.1.2_3.0_1694787034924.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jjinbbangman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_jjinbbangman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jjinbbangman| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JJinBBangMan/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_johnyyhk_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_johnyyhk_en.md new file mode 100644 index 00000000000000..4d0c68de8b954c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_johnyyhk_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_johnyyhk DistilBertEmbeddings from johnyyhk +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_johnyyhk +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_johnyyhk` is a English model originally trained by johnyyhk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_johnyyhk_en_5.1.2_3.0_1694785221188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_johnyyhk_en_5.1.2_3.0_1694785221188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_johnyyhk","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_johnyyhk", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_johnyyhk| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/johnyyhk/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_junchengding_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_junchengding_en.md new file mode 100644 index 00000000000000..1d08533d2c1d00 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_junchengding_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_junchengding DistilBertEmbeddings from JunchengDing +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_junchengding +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_junchengding` is a English model originally trained by JunchengDing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_junchengding_en_5.1.2_3.0_1694780502980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_junchengding_en_5.1.2_3.0_1694780502980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_junchengding","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_junchengding", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_junchengding| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/JunchengDing/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jwchung_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jwchung_en.md new file mode 100644 index 00000000000000..94047821a62227 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_jwchung_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_jwchung DistilBertEmbeddings from jwchung +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_jwchung +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_jwchung` is a English model originally trained by jwchung. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jwchung_en_5.1.2_3.0_1694774375800.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_jwchung_en_5.1.2_3.0_1694774375800.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_jwchung","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_jwchung", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_jwchung| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jwchung/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kar1sumax_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kar1sumax_en.md new file mode 100644 index 00000000000000..0f0af95c4077d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kar1sumax_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_kar1sumax DistilBertEmbeddings from Kar1suMAX +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_kar1sumax +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_kar1sumax` is a English model originally trained by Kar1suMAX. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kar1sumax_en_5.1.2_3.0_1694771169950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kar1sumax_en_5.1.2_3.0_1694771169950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_kar1sumax","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_kar1sumax", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_kar1sumax| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Kar1suMAX/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kosec39_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kosec39_en.md new file mode 100644 index 00000000000000..8aa415a32b6c90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kosec39_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_kosec39 DistilBertEmbeddings from kosec39 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_kosec39 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_kosec39` is a English model originally trained by kosec39. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kosec39_en_5.1.2_3.0_1694771918349.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kosec39_en_5.1.2_3.0_1694771918349.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_kosec39","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_kosec39", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_kosec39| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kosec39/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ksaml_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ksaml_en.md new file mode 100644 index 00000000000000..85a16d6a391374 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ksaml_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ksaml DistilBertEmbeddings from ksaml +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ksaml +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ksaml` is a English model originally trained by ksaml. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ksaml_en_5.1.2_3.0_1694772454474.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ksaml_en_5.1.2_3.0_1694772454474.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_ksaml","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_ksaml", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ksaml| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ksaml/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kyle2023_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kyle2023_en.md new file mode 100644 index 00000000000000..e617536c967736 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_kyle2023_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_kyle2023 DistilBertEmbeddings from kyle2023 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_kyle2023 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_kyle2023` is a English model originally trained by kyle2023. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kyle2023_en_5.1.2_3.0_1694788760520.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_kyle2023_en_5.1.2_3.0_1694788760520.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_kyle2023","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_kyle2023", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_kyle2023| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kyle2023/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lindarz_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lindarz_en.md new file mode 100644 index 00000000000000..1316363d009c86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lindarz_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_lindarz DistilBertEmbeddings from lindarz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_lindarz +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_lindarz` is a English model originally trained by lindarz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lindarz_en_5.1.2_3.0_1694771058189.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lindarz_en_5.1.2_3.0_1694771058189.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_lindarz","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_lindarz", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_lindarz| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lindarz/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_liquannan_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_liquannan_en.md new file mode 100644 index 00000000000000..1e789097ea533d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_liquannan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_liquannan DistilBertEmbeddings from liquannan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_liquannan +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_liquannan` is a English model originally trained by liquannan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_liquannan_en_5.1.2_3.0_1694770219474.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_liquannan_en_5.1.2_3.0_1694770219474.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_liquannan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_liquannan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_liquannan| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/liquannan/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lokeshsoni2801_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lokeshsoni2801_en.md new file mode 100644 index 00000000000000..51233de9251ce7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lokeshsoni2801_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_lokeshsoni2801 DistilBertEmbeddings from Lokeshsoni2801 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_lokeshsoni2801 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_lokeshsoni2801` is a English model originally trained by Lokeshsoni2801. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lokeshsoni2801_en_5.1.2_3.0_1694770947224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lokeshsoni2801_en_5.1.2_3.0_1694770947224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_lokeshsoni2801","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_lokeshsoni2801", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_lokeshsoni2801| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Lokeshsoni2801/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lsimon_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lsimon_en.md new file mode 100644 index 00000000000000..066072d4e9bdae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_lsimon_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_lsimon DistilBertEmbeddings from lsimon +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_lsimon +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_lsimon` is a English model originally trained by lsimon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lsimon_en_5.1.2_3.0_1694783337770.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_lsimon_en_5.1.2_3.0_1694783337770.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_lsimon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_lsimon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_lsimon| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lsimon/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_luzimu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_luzimu_en.md new file mode 100644 index 00000000000000..2432be0cfaa61e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_luzimu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_luzimu DistilBertEmbeddings from luzimu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_luzimu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_luzimu` is a English model originally trained by luzimu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_luzimu_en_5.1.2_3.0_1694782954592.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_luzimu_en_5.1.2_3.0_1694782954592.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_luzimu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_luzimu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_luzimu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/luzimu/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_magnustragardh_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_magnustragardh_en.md new file mode 100644 index 00000000000000..59d521aa257ae9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_magnustragardh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_magnustragardh DistilBertEmbeddings from magnustragardh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_magnustragardh +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_magnustragardh` is a English model originally trained by magnustragardh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_magnustragardh_en_5.1.2_3.0_1694784349082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_magnustragardh_en_5.1.2_3.0_1694784349082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_magnustragardh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_magnustragardh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_magnustragardh| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/magnustragardh/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_manishw_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_manishw_en.md new file mode 100644 index 00000000000000..7adeccce72cddf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_manishw_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_manishw DistilBertEmbeddings from ManishW +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_manishw +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_manishw` is a English model originally trained by ManishW. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_manishw_en_5.1.2_3.0_1694779007105.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_manishw_en_5.1.2_3.0_1694779007105.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_manishw","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_manishw", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_manishw| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ManishW/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_marccram_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_marccram_en.md new file mode 100644 index 00000000000000..359af8942ad663 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_marccram_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_marccram DistilBertEmbeddings from marccram +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_marccram +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_marccram` is a English model originally trained by marccram. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_marccram_en_5.1.2_3.0_1694771311229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_marccram_en_5.1.2_3.0_1694771311229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_marccram","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_marccram", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_marccram| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/marccram/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_martingui_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_martingui_en.md new file mode 100644 index 00000000000000..17cc99ea75ac7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_martingui_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_martingui DistilBertEmbeddings from MartinGui +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_martingui +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_martingui` is a English model originally trained by MartinGui. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_martingui_en_5.1.2_3.0_1694786646812.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_martingui_en_5.1.2_3.0_1694786646812.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_martingui","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_martingui", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_martingui| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/MartinGui/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mattiaparavisi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mattiaparavisi_en.md new file mode 100644 index 00000000000000..738ad55426e54b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mattiaparavisi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mattiaparavisi DistilBertEmbeddings from MattiaParavisi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mattiaparavisi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mattiaparavisi` is a English model originally trained by MattiaParavisi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mattiaparavisi_en_5.1.2_3.0_1694782470244.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mattiaparavisi_en_5.1.2_3.0_1694782470244.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mattiaparavisi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_mattiaparavisi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mattiaparavisi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/MattiaParavisi/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_maysamalfiza_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_maysamalfiza_en.md new file mode 100644 index 00000000000000..133c958c38779c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_maysamalfiza_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_maysamalfiza DistilBertEmbeddings from maysamalfiza +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_maysamalfiza +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_maysamalfiza` is a English model originally trained by maysamalfiza. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_maysamalfiza_en_5.1.2_3.0_1694789818242.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_maysamalfiza_en_5.1.2_3.0_1694789818242.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_maysamalfiza","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_maysamalfiza", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_maysamalfiza| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/maysamalfiza/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mchalek_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mchalek_en.md new file mode 100644 index 00000000000000..6cd24d86760a96 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mchalek_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mchalek DistilBertEmbeddings from mchalek +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mchalek +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mchalek` is a English model originally trained by mchalek. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mchalek_en_5.1.2_3.0_1694790810503.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mchalek_en_5.1.2_3.0_1694790810503.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mchalek","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_mchalek", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mchalek| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mchalek/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mholi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mholi_en.md new file mode 100644 index 00000000000000..0a48e83d439f16 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mholi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mholi DistilBertEmbeddings from mholi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mholi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mholi` is a English model originally trained by mholi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mholi_en_5.1.2_3.0_1694774114982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mholi_en_5.1.2_3.0_1694774114982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mholi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_mholi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mholi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mholi/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mildmillard_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mildmillard_en.md new file mode 100644 index 00000000000000..8053e8d6830bb3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mildmillard_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mildmillard DistilBertEmbeddings from mildmillard +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mildmillard +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mildmillard` is a English model originally trained by mildmillard. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mildmillard_en_5.1.2_3.0_1694788468751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mildmillard_en_5.1.2_3.0_1694788468751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mildmillard","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_mildmillard", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mildmillard| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mildmillard/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mintz1104_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mintz1104_en.md new file mode 100644 index 00000000000000..ce1a8e679e3743 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mintz1104_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mintz1104 DistilBertEmbeddings from mintz1104 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mintz1104 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mintz1104` is a English model originally trained by mintz1104. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mintz1104_en_5.1.2_3.0_1694783458456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mintz1104_en_5.1.2_3.0_1694783458456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mintz1104","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_mintz1104", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mintz1104| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mintz1104/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_minye819_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_minye819_en.md new file mode 100644 index 00000000000000..981398a5275064 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_minye819_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_minye819 DistilBertEmbeddings from minye819 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_minye819 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_minye819` is a English model originally trained by minye819. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_minye819_en_5.1.2_3.0_1694785732543.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_minye819_en_5.1.2_3.0_1694785732543.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_minye819","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_minye819", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_minye819| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/minye819/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mlm_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mlm_accelerate_en.md new file mode 100644 index 00000000000000..0e9588d860230f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mlm_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mlm_accelerate DistilBertEmbeddings from pritam3355 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mlm_accelerate +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mlm_accelerate` is a English model originally trained by pritam3355. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mlm_accelerate_en_5.1.2_3.0_1694783358290.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mlm_accelerate_en_5.1.2_3.0_1694783358290.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mlm_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_mlm_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mlm_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/pritam3355/distilbert-base-uncased-finetuned-imdb-mlm-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mulinski_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mulinski_en.md new file mode 100644 index 00000000000000..a9a4057f89695c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mulinski_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mulinski DistilBertEmbeddings from mulinski +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mulinski +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mulinski` is a English model originally trained by mulinski. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mulinski_en_5.1.2_3.0_1694776177033.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mulinski_en_5.1.2_3.0_1694776177033.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mulinski","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_mulinski", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mulinski| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mulinski/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mxalmeida_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mxalmeida_en.md new file mode 100644 index 00000000000000..41c4c1bbd6112e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_mxalmeida_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_mxalmeida DistilBertEmbeddings from mxalmeida +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_mxalmeida +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_mxalmeida` is a English model originally trained by mxalmeida. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mxalmeida_en_5.1.2_3.0_1694783674388.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_mxalmeida_en_5.1.2_3.0_1694783674388.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_mxalmeida","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_mxalmeida", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_mxalmeida| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mxalmeida/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_nugget00_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_nugget00_en.md new file mode 100644 index 00000000000000..4409f03c189a88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_nugget00_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_nugget00 DistilBertEmbeddings from nugget00 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_nugget00 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_nugget00` is a English model originally trained by nugget00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_nugget00_en_5.1.2_3.0_1694788096126.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_nugget00_en_5.1.2_3.0_1694788096126.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_nugget00","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_nugget00", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_nugget00| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/nugget00/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_orangelu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_orangelu_en.md new file mode 100644 index 00000000000000..97c078a6e5b710 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_orangelu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_orangelu DistilBertEmbeddings from orangelu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_orangelu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_orangelu` is a English model originally trained by orangelu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_orangelu_en_5.1.2_3.0_1694786367577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_orangelu_en_5.1.2_3.0_1694786367577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_orangelu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_orangelu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_orangelu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/orangelu/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_parchiev_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_parchiev_en.md new file mode 100644 index 00000000000000..7341b84fd633a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_parchiev_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_parchiev DistilBertEmbeddings from parchiev +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_parchiev +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_parchiev` is a English model originally trained by parchiev. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_parchiev_en_5.1.2_3.0_1694771616239.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_parchiev_en_5.1.2_3.0_1694771616239.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_parchiev","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_parchiev", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_parchiev| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/parchiev/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_pattom_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_pattom_en.md new file mode 100644 index 00000000000000..9fb28b2b1fab35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_pattom_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_pattom DistilBertEmbeddings from pattom +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_pattom +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_pattom` is a English model originally trained by pattom. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pattom_en_5.1.2_3.0_1694785696930.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pattom_en_5.1.2_3.0_1694785696930.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_pattom","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_pattom", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_pattom| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/pattom/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_peterhsu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_peterhsu_en.md new file mode 100644 index 00000000000000..b71c0181d15293 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_peterhsu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_peterhsu DistilBertEmbeddings from peterhsu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_peterhsu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_peterhsu` is a English model originally trained by peterhsu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_peterhsu_en_5.1.2_3.0_1694783528508.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_peterhsu_en_5.1.2_3.0_1694783528508.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_peterhsu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_peterhsu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_peterhsu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/peterhsu/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_peteryushunli_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_peteryushunli_en.md new file mode 100644 index 00000000000000..237e7a73f09e5e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_peteryushunli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_peteryushunli DistilBertEmbeddings from peteryushunli +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_peteryushunli +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_peteryushunli` is a English model originally trained by peteryushunli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_peteryushunli_en_5.1.2_3.0_1694784593034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_peteryushunli_en_5.1.2_3.0_1694784593034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_peteryushunli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_peteryushunli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_peteryushunli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/peteryushunli/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_physhunter_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_physhunter_en.md new file mode 100644 index 00000000000000..41adc211b1df3f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_physhunter_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_physhunter DistilBertEmbeddings from PhysHunter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_physhunter +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_physhunter` is a English model originally trained by PhysHunter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_physhunter_en_5.1.2_3.0_1694772969529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_physhunter_en_5.1.2_3.0_1694772969529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_physhunter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_physhunter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_physhunter| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/PhysHunter/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_pierre_arthur_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_pierre_arthur_en.md new file mode 100644 index 00000000000000..7654d1053146fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_pierre_arthur_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_pierre_arthur DistilBertEmbeddings from Pierre-Arthur +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_pierre_arthur +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_pierre_arthur` is a English model originally trained by Pierre-Arthur. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pierre_arthur_en_5.1.2_3.0_1694778085444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_pierre_arthur_en_5.1.2_3.0_1694778085444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_pierre_arthur","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_pierre_arthur", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_pierre_arthur| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Pierre-Arthur/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_poplkl_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_poplkl_en.md new file mode 100644 index 00000000000000..ac1d92179e7b0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_poplkl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_poplkl DistilBertEmbeddings from poplkl +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_poplkl +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_poplkl` is a English model originally trained by poplkl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_poplkl_en_5.1.2_3.0_1694778090892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_poplkl_en_5.1.2_3.0_1694778090892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_poplkl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_poplkl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_poplkl| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/poplkl/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_prasanthin_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_prasanthin_en.md new file mode 100644 index 00000000000000..6884d0f95bdbcc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_prasanthin_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_prasanthin DistilBertEmbeddings from Prasanthin +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_prasanthin +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_prasanthin` is a English model originally trained by Prasanthin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_prasanthin_en_5.1.2_3.0_1694791836770.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_prasanthin_en_5.1.2_3.0_1694791836770.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_prasanthin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_prasanthin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_prasanthin| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Prasanthin/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_qianyu88_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_qianyu88_en.md new file mode 100644 index 00000000000000..bd4fc552267259 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_qianyu88_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_qianyu88 DistilBertEmbeddings from qianyu88 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_qianyu88 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_qianyu88` is a English model originally trained by qianyu88. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_qianyu88_en_5.1.2_3.0_1694791401990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_qianyu88_en_5.1.2_3.0_1694791401990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_qianyu88","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_qianyu88", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_qianyu88| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/qianyu88/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rajknakka_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rajknakka_en.md new file mode 100644 index 00000000000000..1ba868fc50d4b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rajknakka_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rajknakka DistilBertEmbeddings from RajkNakka +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rajknakka +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rajknakka` is a English model originally trained by RajkNakka. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rajknakka_en_5.1.2_3.0_1694789607833.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rajknakka_en_5.1.2_3.0_1694789607833.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_rajknakka","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_rajknakka", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rajknakka| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RajkNakka/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_raphaelmerx_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_raphaelmerx_en.md new file mode 100644 index 00000000000000..7d8ba517a2bac4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_raphaelmerx_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_raphaelmerx DistilBertEmbeddings from raphaelmerx +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_raphaelmerx +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_raphaelmerx` is a English model originally trained by raphaelmerx. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_raphaelmerx_en_5.1.2_3.0_1694783693258.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_raphaelmerx_en_5.1.2_3.0_1694783693258.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_raphaelmerx","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_raphaelmerx", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_raphaelmerx| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/raphaelmerx/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_raulgdp_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_raulgdp_en.md new file mode 100644 index 00000000000000..78b12fc538a71a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_raulgdp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_raulgdp DistilBertEmbeddings from raulgdp +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_raulgdp +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_raulgdp` is a English model originally trained by raulgdp. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_raulgdp_en_5.1.2_3.0_1694777650610.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_raulgdp_en_5.1.2_3.0_1694777650610.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_raulgdp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_raulgdp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_raulgdp| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/raulgdp/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ray2791_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ray2791_en.md new file mode 100644 index 00000000000000..501b64bc880b82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ray2791_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ray2791 DistilBertEmbeddings from Ray2791 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ray2791 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ray2791` is a English model originally trained by Ray2791. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ray2791_en_5.1.2_3.0_1694777725820.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ray2791_en_5.1.2_3.0_1694777725820.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_ray2791","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_ray2791", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ray2791| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ray2791/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rayguo2023_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rayguo2023_en.md new file mode 100644 index 00000000000000..73aa24abb3ba54 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rayguo2023_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rayguo2023 DistilBertEmbeddings from RayGuo2023 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rayguo2023 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rayguo2023` is a English model originally trained by RayGuo2023. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rayguo2023_en_5.1.2_3.0_1694778200046.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rayguo2023_en_5.1.2_3.0_1694778200046.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_rayguo2023","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_rayguo2023", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rayguo2023| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RayGuo2023/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rd124_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rd124_en.md new file mode 100644 index 00000000000000..84a7e06d499854 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rd124_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rd124 DistilBertEmbeddings from rd124 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rd124 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rd124` is a English model originally trained by rd124. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rd124_en_5.1.2_3.0_1694790386344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rd124_en_5.1.2_3.0_1694790386344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_rd124","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_rd124", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rd124| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rd124/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rdvdsn_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rdvdsn_en.md new file mode 100644 index 00000000000000..5708a4f1a88796 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rdvdsn_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rdvdsn DistilBertEmbeddings from rdvdsn +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rdvdsn +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rdvdsn` is a English model originally trained by rdvdsn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rdvdsn_en_5.1.2_3.0_1694786547240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rdvdsn_en_5.1.2_3.0_1694786547240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_rdvdsn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_rdvdsn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rdvdsn| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rdvdsn/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_renyulin_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_renyulin_en.md new file mode 100644 index 00000000000000..61b80ff542bdc8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_renyulin_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_renyulin DistilBertEmbeddings from renyulin +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_renyulin +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_renyulin` is a English model originally trained by renyulin. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_renyulin_en_5.1.2_3.0_1694773717620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_renyulin_en_5.1.2_3.0_1694773717620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_renyulin","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_renyulin", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_renyulin| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/renyulin/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_replicate_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_replicate_en.md new file mode 100644 index 00000000000000..423b4eb65b7753 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_replicate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_replicate DistilBertEmbeddings from hxshen +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_replicate +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_replicate` is a English model originally trained by hxshen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_replicate_en_5.1.2_3.0_1694790283986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_replicate_en_5.1.2_3.0_1694790283986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_replicate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_replicate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_replicate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hxshen/distilbert-base-uncased-finetuned-imdb-replicate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_reza93v_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_reza93v_en.md new file mode 100644 index 00000000000000..1f4f6a5d579498 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_reza93v_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_reza93v DistilBertEmbeddings from reza93v +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_reza93v +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_reza93v` is a English model originally trained by reza93v. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_reza93v_en_5.1.2_3.0_1694791487598.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_reza93v_en_5.1.2_3.0_1694791487598.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_reza93v","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_reza93v", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_reza93v| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/reza93v/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_robkayinto_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_robkayinto_en.md new file mode 100644 index 00000000000000..bc850f49e0ee85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_robkayinto_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_robkayinto DistilBertEmbeddings from robkayinto +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_robkayinto +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_robkayinto` is a English model originally trained by robkayinto. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_robkayinto_en_5.1.2_3.0_1694771947206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_robkayinto_en_5.1.2_3.0_1694771947206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_robkayinto","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_robkayinto", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_robkayinto| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/robkayinto/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rugo_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rugo_en.md new file mode 100644 index 00000000000000..23f2a3e442b287 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_rugo_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_rugo DistilBertEmbeddings from rugo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_rugo +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_rugo` is a English model originally trained by rugo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rugo_en_5.1.2_3.0_1694783203222.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_rugo_en_5.1.2_3.0_1694783203222.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_rugo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_rugo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_rugo| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rugo/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ryanlai_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ryanlai_en.md new file mode 100644 index 00000000000000..867411875662ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_ryanlai_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_ryanlai DistilBertEmbeddings from ryanlai +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_ryanlai +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_ryanlai` is a English model originally trained by ryanlai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ryanlai_en_5.1.2_3.0_1694786984703.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_ryanlai_en_5.1.2_3.0_1694786984703.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_ryanlai","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_ryanlai", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_ryanlai| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ryanlai/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sabby_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sabby_en.md new file mode 100644 index 00000000000000..9ab0de2344b2ec --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sabby_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sabby DistilBertEmbeddings from sabby +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sabby +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sabby` is a English model originally trained by sabby. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sabby_en_5.1.2_3.0_1694770099489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sabby_en_5.1.2_3.0_1694770099489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sabby","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sabby", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sabby| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sabby/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sakaijun_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sakaijun_en.md new file mode 100644 index 00000000000000..878991606f333c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sakaijun_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sakaijun DistilBertEmbeddings from SakaiJun +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sakaijun +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sakaijun` is a English model originally trained by SakaiJun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sakaijun_en_5.1.2_3.0_1694771049150.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sakaijun_en_5.1.2_3.0_1694771049150.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sakaijun","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sakaijun", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sakaijun| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SakaiJun/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_san94_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_san94_en.md new file mode 100644 index 00000000000000..d11174761785b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_san94_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_san94 DistilBertEmbeddings from san94 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_san94 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_san94` is a English model originally trained by san94. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_san94_en_5.1.2_3.0_1694771416174.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_san94_en_5.1.2_3.0_1694771416174.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_san94","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_san94", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_san94| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/san94/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sarmila_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sarmila_en.md new file mode 100644 index 00000000000000..78661557f06760 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sarmila_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sarmila DistilBertEmbeddings from Sarmila +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sarmila +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sarmila` is a English model originally trained by Sarmila. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sarmila_en_5.1.2_3.0_1694776735479.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sarmila_en_5.1.2_3.0_1694776735479.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sarmila","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sarmila", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sarmila| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Sarmila/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sarthakc44_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sarthakc44_en.md new file mode 100644 index 00000000000000..12aa256cd8feb0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sarthakc44_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sarthakc44 DistilBertEmbeddings from sarthakc44 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sarthakc44 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sarthakc44` is a English model originally trained by sarthakc44. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sarthakc44_en_5.1.2_3.0_1694773301034.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sarthakc44_en_5.1.2_3.0_1694773301034.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sarthakc44","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sarthakc44", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sarthakc44| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sarthakc44/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_satyashetty_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_satyashetty_en.md new file mode 100644 index 00000000000000..1ed1a185d38997 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_satyashetty_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_satyashetty DistilBertEmbeddings from satyashetty +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_satyashetty +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_satyashetty` is a English model originally trained by satyashetty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_satyashetty_en_5.1.2_3.0_1694781764485.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_satyashetty_en_5.1.2_3.0_1694781764485.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_satyashetty","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_satyashetty", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_satyashetty| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/satyashetty/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sertemo_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sertemo_en.md new file mode 100644 index 00000000000000..d0970c40da7edd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sertemo_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sertemo DistilBertEmbeddings from sertemo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sertemo +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sertemo` is a English model originally trained by sertemo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sertemo_en_5.1.2_3.0_1694770457266.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sertemo_en_5.1.2_3.0_1694770457266.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sertemo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sertemo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sertemo| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sertemo/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sgasparorippa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sgasparorippa_en.md new file mode 100644 index 00000000000000..2ecb6be829de77 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sgasparorippa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sgasparorippa DistilBertEmbeddings from sgasparorippa +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sgasparorippa +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sgasparorippa` is a English model originally trained by sgasparorippa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sgasparorippa_en_5.1.2_3.0_1694776057551.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sgasparorippa_en_5.1.2_3.0_1694776057551.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sgasparorippa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sgasparorippa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sgasparorippa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sgasparorippa/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shahriarebrampour_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shahriarebrampour_en.md new file mode 100644 index 00000000000000..3e3884debc4b28 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shahriarebrampour_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_shahriarebrampour DistilBertEmbeddings from shahriarebrampour +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_shahriarebrampour +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_shahriarebrampour` is a English model originally trained by shahriarebrampour. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shahriarebrampour_en_5.1.2_3.0_1694788371168.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shahriarebrampour_en_5.1.2_3.0_1694788371168.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_shahriarebrampour","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_shahriarebrampour", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_shahriarebrampour| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shahriarebrampour/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shre_db_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shre_db_en.md new file mode 100644 index 00000000000000..10467160eef45a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shre_db_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_shre_db DistilBertEmbeddings from shre-db +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_shre_db +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_shre_db` is a English model originally trained by shre-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shre_db_en_5.1.2_3.0_1694782000633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shre_db_en_5.1.2_3.0_1694782000633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_shre_db","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_shre_db", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_shre_db| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shre-db/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shreyasdatar_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shreyasdatar_en.md new file mode 100644 index 00000000000000..9460dd524f637f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_shreyasdatar_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_shreyasdatar DistilBertEmbeddings from shreyasdatar +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_shreyasdatar +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_shreyasdatar` is a English model originally trained by shreyasdatar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shreyasdatar_en_5.1.2_3.0_1694776445411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_shreyasdatar_en_5.1.2_3.0_1694776445411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_shreyasdatar","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_shreyasdatar", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_shreyasdatar| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shreyasdatar/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_smarquie_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_smarquie_en.md new file mode 100644 index 00000000000000..ed761d1542d162 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_smarquie_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_smarquie DistilBertEmbeddings from smarquie +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_smarquie +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_smarquie` is a English model originally trained by smarquie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_smarquie_en_5.1.2_3.0_1694779920112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_smarquie_en_5.1.2_3.0_1694779920112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_smarquie","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_smarquie", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_smarquie| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/smarquie/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_snousias_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_snousias_en.md new file mode 100644 index 00000000000000..ecfa350dccffc4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_snousias_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_snousias DistilBertEmbeddings from snousias +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_snousias +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_snousias` is a English model originally trained by snousias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_snousias_en_5.1.2_3.0_1694771960892.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_snousias_en_5.1.2_3.0_1694771960892.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_snousias","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_snousias", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_snousias| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/snousias/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sofa566_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sofa566_en.md new file mode 100644 index 00000000000000..02b2390b968550 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sofa566_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sofa566 DistilBertEmbeddings from sofa566 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sofa566 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sofa566` is a English model originally trained by sofa566. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sofa566_en_5.1.2_3.0_1694780390044.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sofa566_en_5.1.2_3.0_1694780390044.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sofa566","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sofa566", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sofa566| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sofa566/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_solver_paul_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_solver_paul_en.md new file mode 100644 index 00000000000000..cd7424bfa94d9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_solver_paul_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_solver_paul DistilBertEmbeddings from solver-paul +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_solver_paul +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_solver_paul` is a English model originally trained by solver-paul. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_solver_paul_en_5.1.2_3.0_1694781878267.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_solver_paul_en_5.1.2_3.0_1694781878267.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_solver_paul","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_solver_paul", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_solver_paul| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/solver-paul/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sonali_behera_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sonali_behera_en.md new file mode 100644 index 00000000000000..80d33229edbdb7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sonali_behera_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sonali_behera DistilBertEmbeddings from Sonali-Behera +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sonali_behera +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sonali_behera` is a English model originally trained by Sonali-Behera. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sonali_behera_en_5.1.2_3.0_1694777089415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sonali_behera_en_5.1.2_3.0_1694777089415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sonali_behera","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sonali_behera", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sonali_behera| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Sonali-Behera/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sumedha_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sumedha_en.md new file mode 100644 index 00000000000000..5dc9c50f7f2f47 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_sumedha_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_sumedha DistilBertEmbeddings from Sumedha +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_sumedha +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_sumedha` is a English model originally trained by Sumedha. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sumedha_en_5.1.2_3.0_1694772327267.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_sumedha_en_5.1.2_3.0_1694772327267.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_sumedha","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_sumedha", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_sumedha| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Sumedha/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_supersokol_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_supersokol_en.md new file mode 100644 index 00000000000000..91f40b986bcd2e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_supersokol_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_supersokol DistilBertEmbeddings from SUPERSOKOL +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_supersokol +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_supersokol` is a English model originally trained by SUPERSOKOL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_supersokol_en_5.1.2_3.0_1694785118271.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_supersokol_en_5.1.2_3.0_1694785118271.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_supersokol","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_supersokol", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_supersokol| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SUPERSOKOL/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_surjray_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_surjray_en.md new file mode 100644 index 00000000000000..f642086c54e41c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_surjray_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_surjray DistilBertEmbeddings from surjray +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_surjray +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_surjray` is a English model originally trained by surjray. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_surjray_en_5.1.2_3.0_1694778304075.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_surjray_en_5.1.2_3.0_1694778304075.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_surjray","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_surjray", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_surjray| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/surjray/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_susghosh_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_susghosh_en.md new file mode 100644 index 00000000000000..7d408a50f8418e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_susghosh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_susghosh DistilBertEmbeddings from susghosh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_susghosh +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_susghosh` is a English model originally trained by susghosh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_susghosh_en_5.1.2_3.0_1694781874762.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_susghosh_en_5.1.2_3.0_1694781874762.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_susghosh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_susghosh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_susghosh| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/susghosh/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_talha185_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_talha185_en.md new file mode 100644 index 00000000000000..bc36425da7d080 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_talha185_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_talha185 DistilBertEmbeddings from Talha185 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_talha185 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_talha185` is a English model originally trained by Talha185. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_talha185_en_5.1.2_3.0_1694772261015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_talha185_en_5.1.2_3.0_1694772261015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_talha185","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_talha185", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_talha185| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Talha185/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_techtank_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_techtank_en.md new file mode 100644 index 00000000000000..cae38c16a10a9b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_techtank_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_techtank DistilBertEmbeddings from techtank +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_techtank +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_techtank` is a English model originally trained by techtank. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_techtank_en_5.1.2_3.0_1694785949232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_techtank_en_5.1.2_3.0_1694785949232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_techtank","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_techtank", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_techtank| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/techtank/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_terps_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_terps_en.md new file mode 100644 index 00000000000000..6e26c7c64c3dda --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_terps_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_terps DistilBertEmbeddings from Terps +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_terps +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_terps` is a English model originally trained by Terps. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_terps_en_5.1.2_3.0_1694790485061.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_terps_en_5.1.2_3.0_1694790485061.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_terps","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_terps", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_terps| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Terps/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thangvip_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thangvip_en.md new file mode 100644 index 00000000000000..ac1647a221a16f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thangvip_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_thangvip DistilBertEmbeddings from thangvip +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_thangvip +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_thangvip` is a English model originally trained by thangvip. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thangvip_en_5.1.2_3.0_1694791230390.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thangvip_en_5.1.2_3.0_1694791230390.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_thangvip","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_thangvip", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_thangvip| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/thangvip/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thaophung_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thaophung_en.md new file mode 100644 index 00000000000000..73c55c4da2c560 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thaophung_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_thaophung DistilBertEmbeddings from thaophung +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_thaophung +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_thaophung` is a English model originally trained by thaophung. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thaophung_en_5.1.2_3.0_1694772963814.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thaophung_en_5.1.2_3.0_1694772963814.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_thaophung","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_thaophung", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_thaophung| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/thaophung/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thetaphipsi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thetaphipsi_en.md new file mode 100644 index 00000000000000..54a2bea240c373 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thetaphipsi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_thetaphipsi DistilBertEmbeddings from ThetaPhiPsi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_thetaphipsi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_thetaphipsi` is a English model originally trained by ThetaPhiPsi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thetaphipsi_en_5.1.2_3.0_1694781139251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thetaphipsi_en_5.1.2_3.0_1694781139251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_thetaphipsi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_thetaphipsi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_thetaphipsi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ThetaPhiPsi/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_threite_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_threite_en.md new file mode 100644 index 00000000000000..7c340ddd3dd6b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_threite_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_threite DistilBertEmbeddings from threite +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_threite +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_threite` is a English model originally trained by threite. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_threite_en_5.1.2_3.0_1694788185263.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_threite_en_5.1.2_3.0_1694788185263.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_threite","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_threite", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_threite| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/threite/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thutrang_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thutrang_en.md new file mode 100644 index 00000000000000..21b9dc45959db0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_thutrang_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_thutrang DistilBertEmbeddings from ThuTrang +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_thutrang +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_thutrang` is a English model originally trained by ThuTrang. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thutrang_en_5.1.2_3.0_1694789894015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_thutrang_en_5.1.2_3.0_1694789894015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_thutrang","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_thutrang", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_thutrang| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ThuTrang/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_timtl_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_timtl_en.md new file mode 100644 index 00000000000000..c35ee439580c6e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_timtl_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_timtl DistilBertEmbeddings from TimTL +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_timtl +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_timtl` is a English model originally trained by TimTL. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_timtl_en_5.1.2_3.0_1694784349104.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_timtl_en_5.1.2_3.0_1694784349104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_timtl","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_timtl", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_timtl| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/TimTL/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tkoyama_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tkoyama_en.md new file mode 100644 index 00000000000000..d45409b3924e4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tkoyama_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_tkoyama DistilBertEmbeddings from tkoyama +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_tkoyama +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_tkoyama` is a English model originally trained by tkoyama. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tkoyama_en_5.1.2_3.0_1694788619647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tkoyama_en_5.1.2_3.0_1694788619647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_tkoyama","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_tkoyama", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_tkoyama| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tkoyama/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tlapusan_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tlapusan_en.md new file mode 100644 index 00000000000000..e000090cfebd8a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tlapusan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_tlapusan DistilBertEmbeddings from tlapusan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_tlapusan +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_tlapusan` is a English model originally trained by tlapusan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tlapusan_en_5.1.2_3.0_1694789306473.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tlapusan_en_5.1.2_3.0_1694789306473.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_tlapusan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_tlapusan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_tlapusan| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tlapusan/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tofunumber1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tofunumber1_en.md new file mode 100644 index 00000000000000..352b638c92fb6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tofunumber1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_tofunumber1 DistilBertEmbeddings from TofuNumber1 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_tofunumber1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_tofunumber1` is a English model originally trained by TofuNumber1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tofunumber1_en_5.1.2_3.0_1694776635641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tofunumber1_en_5.1.2_3.0_1694776635641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_tofunumber1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_tofunumber1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_tofunumber1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/TofuNumber1/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tsahhi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tsahhi_en.md new file mode 100644 index 00000000000000..38674573a4d5a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tsahhi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_tsahhi DistilBertEmbeddings from Tsahhi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_tsahhi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_tsahhi` is a English model originally trained by Tsahhi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tsahhi_en_5.1.2_3.0_1694788650908.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tsahhi_en_5.1.2_3.0_1694788650908.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_tsahhi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_tsahhi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_tsahhi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Tsahhi/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tux_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tux_en.md new file mode 100644 index 00000000000000..23da54dbc779d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tux_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_tux DistilBertEmbeddings from tux +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_tux +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_tux` is a English model originally trained by tux. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tux_en_5.1.2_3.0_1694786698878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tux_en_5.1.2_3.0_1694786698878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_tux","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_tux", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_tux| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tux/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tyson0420_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tyson0420_en.md new file mode 100644 index 00000000000000..cc0f26202bbe37 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_tyson0420_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_tyson0420 DistilBertEmbeddings from tyson0420 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_tyson0420 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_tyson0420` is a English model originally trained by tyson0420. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tyson0420_en_5.1.2_3.0_1694775228117.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_tyson0420_en_5.1.2_3.0_1694775228117.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_tyson0420","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_tyson0420", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_tyson0420| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/tyson0420/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_udoy_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_udoy_en.md new file mode 100644 index 00000000000000..e61ee669c6ab02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_udoy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_udoy DistilBertEmbeddings from Udoy +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_udoy +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_udoy` is a English model originally trained by Udoy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_udoy_en_5.1.2_3.0_1694786003499.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_udoy_en_5.1.2_3.0_1694786003499.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_udoy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_udoy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_udoy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Udoy/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_accelerate_en.md new file mode 100644 index 00000000000000..af4706e8fbe3ee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_v2_accelerate DistilBertEmbeddings from kaiku03 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_v2_accelerate +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_v2_accelerate` is a English model originally trained by kaiku03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_v2_accelerate_en_5.1.2_3.0_1694789614333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_v2_accelerate_en_5.1.2_3.0_1694789614333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_v2_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_v2_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_v2_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kaiku03/distilbert-base-uncased-finetuned-imdb_v2_accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_francesco_a_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_francesco_a_en.md new file mode 100644 index 00000000000000..21216c3d68f8ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_francesco_a_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_v2_francesco_a DistilBertEmbeddings from Francesco-A +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_v2_francesco_a +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_v2_francesco_a` is a English model originally trained by Francesco-A. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_v2_francesco_a_en_5.1.2_3.0_1694783237772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_v2_francesco_a_en_5.1.2_3.0_1694783237772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_v2_francesco_a","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_v2_francesco_a", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_v2_francesco_a| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Francesco-A/distilbert-base-uncased-finetuned-imdb-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_rd124_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_rd124_en.md new file mode 100644 index 00000000000000..9bbe40429ed6ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_v2_rd124_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_v2_rd124 DistilBertEmbeddings from rd124 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_v2_rd124 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_v2_rd124` is a English model originally trained by rd124. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_v2_rd124_en_5.1.2_3.0_1694790490326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_v2_rd124_en_5.1.2_3.0_1694790490326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_v2_rd124","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_v2_rd124", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_v2_rd124| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/rd124/distilbert-base-uncased-finetuned-imdb-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vanhoan_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vanhoan_en.md new file mode 100644 index 00000000000000..d0beec3e4579d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vanhoan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_vanhoan DistilBertEmbeddings from VanHoan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_vanhoan +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_vanhoan` is a English model originally trained by VanHoan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_vanhoan_en_5.1.2_3.0_1694783996380.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_vanhoan_en_5.1.2_3.0_1694783996380.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_vanhoan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_vanhoan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_vanhoan| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/VanHoan/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_venkyz9_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_venkyz9_en.md new file mode 100644 index 00000000000000..a18874b7686d58 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_venkyz9_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_venkyz9 DistilBertEmbeddings from venkyz9 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_venkyz9 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_venkyz9` is a English model originally trained by venkyz9. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_venkyz9_en_5.1.2_3.0_1694777457894.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_venkyz9_en_5.1.2_3.0_1694777457894.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_venkyz9","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_venkyz9", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_venkyz9| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/venkyz9/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vibharkchauhan_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vibharkchauhan_en.md new file mode 100644 index 00000000000000..5d10c910439a5f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vibharkchauhan_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_vibharkchauhan DistilBertEmbeddings from Vibharkchauhan +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_vibharkchauhan +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_vibharkchauhan` is a English model originally trained by Vibharkchauhan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_vibharkchauhan_en_5.1.2_3.0_1694770670699.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_vibharkchauhan_en_5.1.2_3.0_1694770670699.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_vibharkchauhan","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_vibharkchauhan", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_vibharkchauhan| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Vibharkchauhan/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vives_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vives_en.md new file mode 100644 index 00000000000000..0b575aff66f065 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vives_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_vives DistilBertEmbeddings from vives +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_vives +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_vives` is a English model originally trained by vives. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_vives_en_5.1.2_3.0_1694772503575.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_vives_en_5.1.2_3.0_1694772503575.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_vives","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_vives", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_vives| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vives/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vsrinivas_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vsrinivas_en.md new file mode 100644 index 00000000000000..0429da59fdcdfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_vsrinivas_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_vsrinivas DistilBertEmbeddings from vsrinivas +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_vsrinivas +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_vsrinivas` is a English model originally trained by vsrinivas. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_vsrinivas_en_5.1.2_3.0_1694787983867.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_vsrinivas_en_5.1.2_3.0_1694787983867.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_vsrinivas","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_vsrinivas", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_vsrinivas| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vsrinivas/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_whole_word_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_whole_word_en.md new file mode 100644 index 00000000000000..6998c4d2df35c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_whole_word_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_whole_word DistilBertEmbeddings from PhysHunter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_whole_word +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_whole_word` is a English model originally trained by PhysHunter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_whole_word_en_5.1.2_3.0_1694773100332.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_whole_word_en_5.1.2_3.0_1694773100332.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_whole_word","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_whole_word", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_whole_word| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/PhysHunter/distilbert-base-uncased-finetuned-imdb-whole-word \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_wjbmattingly_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_wjbmattingly_en.md new file mode 100644 index 00000000000000..932dfdf7b1c789 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_wjbmattingly_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_wjbmattingly DistilBertEmbeddings from wjbmattingly +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_wjbmattingly +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_wjbmattingly` is a English model originally trained by wjbmattingly. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_wjbmattingly_en_5.1.2_3.0_1694786512902.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_wjbmattingly_en_5.1.2_3.0_1694786512902.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_wjbmattingly","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_wjbmattingly", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_wjbmattingly| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/wjbmattingly/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_y_haneji_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_y_haneji_en.md new file mode 100644 index 00000000000000..765c2547379625 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_y_haneji_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_y_haneji DistilBertEmbeddings from Y-Haneji +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_y_haneji +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_y_haneji` is a English model originally trained by Y-Haneji. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_y_haneji_en_5.1.2_3.0_1694779145507.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_y_haneji_en_5.1.2_3.0_1694779145507.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_y_haneji","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_y_haneji", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_y_haneji| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Y-Haneji/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_yangwooko_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_yangwooko_en.md new file mode 100644 index 00000000000000..4ff6d48920795c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_yangwooko_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_yangwooko DistilBertEmbeddings from yangwooko +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_yangwooko +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_yangwooko` is a English model originally trained by yangwooko. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_yangwooko_en_5.1.2_3.0_1694776915854.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_yangwooko_en_5.1.2_3.0_1694776915854.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_yangwooko","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_yangwooko", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_yangwooko| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/yangwooko/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_yuto01_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_yuto01_en.md new file mode 100644 index 00000000000000..54c7b28979c107 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_imdb_yuto01_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_imdb_yuto01 DistilBertEmbeddings from Yuto01 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_imdb_yuto01 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_imdb_yuto01` is a English model originally trained by Yuto01. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_yuto01_en_5.1.2_3.0_1694789487678.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_imdb_yuto01_en_5.1.2_3.0_1694789487678.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_imdb_yuto01","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_imdb_yuto01", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_imdb_yuto01| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Yuto01/distilbert-base-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ivr_finetuned_ivr_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ivr_finetuned_ivr_en.md new file mode 100644 index 00000000000000..5bb26cad9437d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_ivr_finetuned_ivr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_ivr_finetuned_ivr DistilBertEmbeddings from khubaib +author: John Snow Labs +name: distilbert_base_uncased_finetuned_ivr_finetuned_ivr +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_ivr_finetuned_ivr` is a English model originally trained by khubaib. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ivr_finetuned_ivr_en_5.1.2_3.0_1694776652350.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_ivr_finetuned_ivr_en_5.1.2_3.0_1694776652350.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_ivr_finetuned_ivr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_ivr_finetuned_ivr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_ivr_finetuned_ivr| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/khubaib/distilbert-base-uncased-finetuned-ivr-finetuned-ivr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_kintweetse_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_kintweetse_en.md new file mode 100644 index 00000000000000..28f11009faeec0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_kintweetse_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_kintweetse DistilBertEmbeddings from RogerB +author: John Snow Labs +name: distilbert_base_uncased_finetuned_kintweetse +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_kintweetse` is a English model originally trained by RogerB. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_kintweetse_en_5.1.2_3.0_1694773642871.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_kintweetse_en_5.1.2_3.0_1694773642871.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_kintweetse","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_kintweetse", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_kintweetse| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RogerB/distilbert-base-uncased-finetuned-kintweetsE \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_mlm_1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_mlm_1_en.md new file mode 100644 index 00000000000000..c9f7956e876fab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_mlm_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_mlm_1 DistilBertEmbeddings from aarroonn22 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_mlm_1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_mlm_1` is a English model originally trained by aarroonn22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_mlm_1_en_5.1.2_3.0_1694772083071.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_mlm_1_en_5.1.2_3.0_1694772083071.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_mlm_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_mlm_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_mlm_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/aarroonn22/distilbert-base-uncased-finetuned-mlm-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_mlm_2_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_mlm_2_en.md new file mode 100644 index 00000000000000..0b03bbfd1628c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_mlm_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_mlm_2 DistilBertEmbeddings from aarroonn22 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_mlm_2 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_mlm_2` is a English model originally trained by aarroonn22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_mlm_2_en_5.1.2_3.0_1694770824986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_mlm_2_en_5.1.2_3.0_1694770824986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_mlm_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_mlm_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_mlm_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/aarroonn22/distilbert-base-uncased-finetuned-mlm-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_nitro_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_nitro_en.md new file mode 100644 index 00000000000000..e0494c826755ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_nitro_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_nitro DistilBertEmbeddings from dieexbr +author: John Snow Labs +name: distilbert_base_uncased_finetuned_nitro +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_nitro` is a English model originally trained by dieexbr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_nitro_en_5.1.2_3.0_1694771823890.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_nitro_en_5.1.2_3.0_1694771823890.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_nitro","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_nitro", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_nitro| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dieexbr/distilbert-base-uncased-finetuned-nitro \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outoh_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outoh_en.md new file mode 100644 index 00000000000000..2f0bf97fda503e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outoh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_outoh DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_outoh +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_outoh` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_outoh_en_5.1.2_3.0_1694779309949.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_outoh_en_5.1.2_3.0_1694779309949.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_outoh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_outoh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_outoh| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-outoH \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_en.md new file mode 100644 index 00000000000000..d85783ca467698 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_outop DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_outop +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_outop` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_outop_en_5.1.2_3.0_1694779414180.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_outop_en_5.1.2_3.0_1694779414180.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_outop","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_outop", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_outop| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-outop \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_j_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_j_en.md new file mode 100644 index 00000000000000..0a6350b9159fc5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_j_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_outop_j DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_outop_j +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_outop_j` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_outop_j_en_5.1.2_3.0_1694779643765.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_outop_j_en_5.1.2_3.0_1694779643765.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_outop_j","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_outop_j", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_outop_j| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-outop-J \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_y_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_y_en.md new file mode 100644 index 00000000000000..7ac829c6411954 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_outop_y_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_outop_y DistilBertEmbeddings from himanimaheshwari3 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_outop_y +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_outop_y` is a English model originally trained by himanimaheshwari3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_outop_y_en_5.1.2_3.0_1694779519036.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_outop_y_en_5.1.2_3.0_1694779519036.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_outop_y","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_outop_y", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_outop_y| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/himanimaheshwari3/distilbert-base-uncased-finetuned-outop-y \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_preprint_full_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_preprint_full_en.md new file mode 100644 index 00000000000000..537f30f2a256de --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_preprint_full_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_preprint_full DistilBertEmbeddings from vamads +author: John Snow Labs +name: distilbert_base_uncased_finetuned_preprint_full +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_preprint_full` is a English model originally trained by vamads. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_preprint_full_en_5.1.2_3.0_1694780135347.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_preprint_full_en_5.1.2_3.0_1694780135347.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_preprint_full","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_preprint_full", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_preprint_full| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/vamads/distilbert-base-uncased-finetuned-preprint_full \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_provenances_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_provenances_en.md new file mode 100644 index 00000000000000..96df623755d907 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_provenances_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_provenances DistilBertEmbeddings from RiccardoGvn +author: John Snow Labs +name: distilbert_base_uncased_finetuned_provenances +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_provenances` is a English model originally trained by RiccardoGvn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_provenances_en_5.1.2_3.0_1694781872135.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_provenances_en_5.1.2_3.0_1694781872135.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_provenances","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_provenances", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_provenances| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RiccardoGvn/distilbert-base-uncased-finetuned-provenances \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_provenances_finetuned_provenances_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_provenances_finetuned_provenances_en.md new file mode 100644 index 00000000000000..2e0bc5c4ca3205 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_provenances_finetuned_provenances_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_provenances_finetuned_provenances DistilBertEmbeddings from RiccardoGvn +author: John Snow Labs +name: distilbert_base_uncased_finetuned_provenances_finetuned_provenances +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_provenances_finetuned_provenances` is a English model originally trained by RiccardoGvn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_provenances_finetuned_provenances_en_5.1.2_3.0_1694781997921.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_provenances_finetuned_provenances_en_5.1.2_3.0_1694781997921.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_provenances_finetuned_provenances","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_provenances_finetuned_provenances", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_provenances_finetuned_provenances| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RiccardoGvn/distilbert-base-uncased-finetuned-provenances-finetuned-provenances \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_rap_lyrics_v1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_rap_lyrics_v1_en.md new file mode 100644 index 00000000000000..a6db78786dcad1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_rap_lyrics_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_rap_lyrics_v1 DistilBertEmbeddings from peteryushunli +author: John Snow Labs +name: distilbert_base_uncased_finetuned_rap_lyrics_v1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_rap_lyrics_v1` is a English model originally trained by peteryushunli. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_rap_lyrics_v1_en_5.1.2_3.0_1694788210807.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_rap_lyrics_v1_en_5.1.2_3.0_1694788210807.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_rap_lyrics_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_rap_lyrics_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_rap_lyrics_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/peteryushunli/distilbert-base-uncased-finetuned-rap-lyrics-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_recipe_accelerate_1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_recipe_accelerate_1_en.md new file mode 100644 index 00000000000000..2c0e27bcab0f19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_recipe_accelerate_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_recipe_accelerate_1 DistilBertEmbeddings from CennetOguz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_recipe_accelerate_1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_recipe_accelerate_1` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipe_accelerate_1_en_5.1.2_3.0_1694770672733.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipe_accelerate_1_en_5.1.2_3.0_1694770672733.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_recipe_accelerate_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_recipe_accelerate_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_recipe_accelerate_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/CennetOguz/distilbert-base-uncased-finetuned-recipe-accelerate-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_recipe_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_recipe_accelerate_en.md new file mode 100644 index 00000000000000..d64ec039e6bc80 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_recipe_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_recipe_accelerate DistilBertEmbeddings from CennetOguz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_recipe_accelerate +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_recipe_accelerate` is a English model originally trained by CennetOguz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipe_accelerate_en_5.1.2_3.0_1694770774708.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_recipe_accelerate_en_5.1.2_3.0_1694770774708.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_recipe_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_recipe_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_recipe_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/CennetOguz/distilbert-base-uncased-finetuned-recipe-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_speeches_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_speeches_en.md new file mode 100644 index 00000000000000..01ced0e38ca9fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_speeches_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_speeches DistilBertEmbeddings from peterday +author: John Snow Labs +name: distilbert_base_uncased_finetuned_speeches +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_speeches` is a English model originally trained by peterday. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_speeches_en_5.1.2_3.0_1694770816356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_speeches_en_5.1.2_3.0_1694770816356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_speeches","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_speeches", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_speeches| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/peterday/distilbert-base-uncased-finetuned-speeches \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en.md new file mode 100644 index 00000000000000..c76388f0a9c3c8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz DistilBertEmbeddings from BatuhanYilmaz +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz` is a English model originally trained by BatuhanYilmaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en_5.1.2_3.0_1694770351064.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz_en_5.1.2_3.0_1694770351064.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_batuhanyilmaz| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/BatuhanYilmaz/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg_en.md new file mode 100644 index 00000000000000..e50ed6892b1b27 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg DistilBertEmbeddings from bellawanggg +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg` is a English model originally trained by bellawanggg. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg_en_5.1.2_3.0_1694771192012.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg_en_5.1.2_3.0_1694771192012.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_bellawanggg| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/bellawanggg/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db_en.md new file mode 100644 index 00000000000000..19254d4efb38ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db DistilBertEmbeddings from coreyabs-db +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db` is a English model originally trained by coreyabs-db. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db_en_5.1.2_3.0_1694781671387.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db_en_5.1.2_3.0_1694781671387.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_coreyabs_db| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/coreyabs-db/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_en.md new file mode 100644 index 00000000000000..3e0cc2060164b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_dchung117 DistilBertEmbeddings from dchung117 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_dchung117 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_dchung117` is a English model originally trained by dchung117. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_en_5.1.2_3.0_1694788118641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dchung117_en_5.1.2_3.0_1694788118641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_dchung117","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_dchung117", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_dchung117| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dchung117/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en.md new file mode 100644 index 00000000000000..77e28d5e954cc1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_dkimds DistilBertEmbeddings from dkimds +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_dkimds +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_dkimds` is a English model originally trained by dkimds. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en_5.1.2_3.0_1694786923235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_dkimds_en_5.1.2_3.0_1694786923235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_dkimds","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_dkimds", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_dkimds| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dkimds/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi_en.md new file mode 100644 index 00000000000000..27ef6d217bbcd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi DistilBertEmbeddings from fadliaulawi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi` is a English model originally trained by fadliaulawi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi_en_5.1.2_3.0_1694777976444.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi_en_5.1.2_3.0_1694777976444.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_fadliaulawi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/fadliaulawi/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en.md new file mode 100644 index 00000000000000..f2312e27b7f8e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989 DistilBertEmbeddings from gautam1989 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989` is a English model originally trained by gautam1989. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en_5.1.2_3.0_1694773530136.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989_en_5.1.2_3.0_1694773530136.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_gautam1989| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gautam1989/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_gostrive_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_gostrive_en.md new file mode 100644 index 00000000000000..75b429d50c86ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_gostrive_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_gostrive DistilBertEmbeddings from gostrive +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_gostrive +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_gostrive` is a English model originally trained by gostrive. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_gostrive_en_5.1.2_3.0_1694782689611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_gostrive_en_5.1.2_3.0_1694782689611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_gostrive","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_gostrive", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_gostrive| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/gostrive/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_guoguo_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_guoguo_en.md new file mode 100644 index 00000000000000..5c932c1ae45321 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_guoguo_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_guoguo DistilBertEmbeddings from guoguo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_guoguo +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_guoguo` is a English model originally trained by guoguo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_guoguo_en_5.1.2_3.0_1694784353246.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_guoguo_en_5.1.2_3.0_1694784353246.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_guoguo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_guoguo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_guoguo| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/guoguo/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr_en.md new file mode 100644 index 00000000000000..6d0b52804d9bf6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr DistilBertEmbeddings from iotengtr +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr` is a English model originally trained by iotengtr. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr_en_5.1.2_3.0_1694774783675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr_en_5.1.2_3.0_1694774783675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_iotengtr| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/iotengtr/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en.md new file mode 100644 index 00000000000000..4707af5be751e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81 DistilBertEmbeddings from juancopi81 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81` is a English model originally trained by juancopi81. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en_5.1.2_3.0_1694779999978.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81_en_5.1.2_3.0_1694779999978.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_juancopi81| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/juancopi81/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en.md new file mode 100644 index 00000000000000..ca6d920b37cc4f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea DistilBertEmbeddings from jwlovetea +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea` is a English model originally trained by jwlovetea. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en_5.1.2_3.0_1694790258197.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea_en_5.1.2_3.0_1694790258197.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_jwlovetea| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jwlovetea/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn_en.md new file mode 100644 index 00000000000000..b799b235ed2df7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn DistilBertEmbeddings from lakecrimsonn +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn` is a English model originally trained by lakecrimsonn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn_en_5.1.2_3.0_1694775410500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn_en_5.1.2_3.0_1694775410500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_lakecrimsonn| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lakecrimsonn/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_luzimu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_luzimu_en.md new file mode 100644 index 00000000000000..201520baae29e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_luzimu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_luzimu DistilBertEmbeddings from luzimu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_luzimu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_luzimu` is a English model originally trained by luzimu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_luzimu_en_5.1.2_3.0_1694783536406.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_luzimu_en_5.1.2_3.0_1694783536406.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_luzimu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_luzimu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_luzimu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/luzimu/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en.md new file mode 100644 index 00000000000000..6aaf7db41b68e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_maseiya DistilBertEmbeddings from maseiya +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_maseiya +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_maseiya` is a English model originally trained by maseiya. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en_5.1.2_3.0_1694770328657.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_maseiya_en_5.1.2_3.0_1694770328657.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_maseiya","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_maseiya", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_maseiya| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/maseiya/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_mbateman_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_mbateman_en.md new file mode 100644 index 00000000000000..e117378fd6db5b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_mbateman_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_mbateman DistilBertEmbeddings from mbateman +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_mbateman +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_mbateman` is a English model originally trained by mbateman. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_mbateman_en_5.1.2_3.0_1694782120527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_mbateman_en_5.1.2_3.0_1694782120527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_mbateman","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_mbateman", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_mbateman| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/mbateman/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en.md new file mode 100644 index 00000000000000..c947fab2d8e8ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob DistilBertEmbeddings from miesnerjacob +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob` is a English model originally trained by miesnerjacob. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en_5.1.2_3.0_1694774420966.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob_en_5.1.2_3.0_1694774420966.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_miesnerjacob| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/miesnerjacob/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi_en.md new file mode 100644 index 00000000000000..925e071a5e5068 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi DistilBertEmbeddings from nicolacandussi +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi` is a English model originally trained by nicolacandussi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi_en_5.1.2_3.0_1694792078563.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi_en_5.1.2_3.0_1694792078563.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_nicolacandussi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/nicolacandussi/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_nugget00_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_nugget00_en.md new file mode 100644 index 00000000000000..61660c170d082c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_nugget00_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_nugget00 DistilBertEmbeddings from nugget00 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_nugget00 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_nugget00` is a English model originally trained by nugget00. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_nugget00_en_5.1.2_3.0_1694788315308.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_nugget00_en_5.1.2_3.0_1694788315308.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_nugget00","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_nugget00", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_nugget00| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/nugget00/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en.md new file mode 100644 index 00000000000000..32d5a19e816465 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero DistilBertEmbeddings from osanseviero +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero` is a English model originally trained by osanseviero. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en_5.1.2_3.0_1694787328705.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero_en_5.1.2_3.0_1694787328705.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_osanseviero| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/osanseviero/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu_en.md new file mode 100644 index 00000000000000..93128e31c9cbaf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu DistilBertEmbeddings from peterhsu +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu` is a English model originally trained by peterhsu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu_en_5.1.2_3.0_1694787941415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu_en_5.1.2_3.0_1694787941415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_peterhsu| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/peterhsu/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en.md new file mode 100644 index 00000000000000..a5dd3aaecc4b28 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_physhunter DistilBertEmbeddings from PhysHunter +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_physhunter +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_physhunter` is a English model originally trained by PhysHunter. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en_5.1.2_3.0_1694776775566.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_physhunter_en_5.1.2_3.0_1694776775566.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_physhunter","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_physhunter", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_physhunter| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/PhysHunter/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake_en.md new file mode 100644 index 00000000000000..6124c05c388d9f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake DistilBertEmbeddings from runningsnake +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake` is a English model originally trained by runningsnake. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake_en_5.1.2_3.0_1694782116286.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake_en_5.1.2_3.0_1694782116286.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_runningsnake| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/runningsnake/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo_en.md new file mode 100644 index 00000000000000..a26a62436e8e37 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo DistilBertEmbeddings from SayaEndo +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo` is a English model originally trained by SayaEndo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo_en_5.1.2_3.0_1694781746406.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo_en_5.1.2_3.0_1694781746406.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sayaendo| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SayaEndo/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en.md new file mode 100644 index 00000000000000..7880a9faf7bb14 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sebastians DistilBertEmbeddings from SebastianS +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sebastians +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sebastians` is a English model originally trained by SebastianS. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en_5.1.2_3.0_1694772917775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sebastians_en_5.1.2_3.0_1694772917775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sebastians","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sebastians", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sebastians| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SebastianS/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sgr23_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sgr23_en.md new file mode 100644 index 00000000000000..efbac5bdd51fad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sgr23_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sgr23 DistilBertEmbeddings from sgr23 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sgr23 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sgr23` is a English model originally trained by sgr23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sgr23_en_5.1.2_3.0_1694784756753.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sgr23_en_5.1.2_3.0_1694784756753.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sgr23","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sgr23", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sgr23| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sgr23/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_en.md new file mode 100644 index 00000000000000..6ca1c0b309f589 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41 DistilBertEmbeddings from ShadowTwin41 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41` is a English model originally trained by ShadowTwin41. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_en_5.1.2_3.0_1694788124987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41_en_5.1.2_3.0_1694788124987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_shadowtwin41| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ShadowTwin41/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_soduhh_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_soduhh_en.md new file mode 100644 index 00000000000000..ba93a526c82f02 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_soduhh_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_soduhh DistilBertEmbeddings from soduhh +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_soduhh +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_soduhh` is a English model originally trained by soduhh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_soduhh_en_5.1.2_3.0_1694774128556.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_soduhh_en_5.1.2_3.0_1694774128556.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_soduhh","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_soduhh", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_soduhh| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/soduhh/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en.md new file mode 100644 index 00000000000000..b830706876afd0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sofa566 DistilBertEmbeddings from sofa566 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sofa566 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sofa566` is a English model originally trained by sofa566. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en_5.1.2_3.0_1694781170986.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sofa566_en_5.1.2_3.0_1694781170986.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sofa566","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sofa566", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sofa566| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/sofa566/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sonny_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sonny_en.md new file mode 100644 index 00000000000000..ed7d934754d984 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sonny_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sonny DistilBertEmbeddings from Sonny +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sonny +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sonny` is a English model originally trained by Sonny. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sonny_en_5.1.2_3.0_1694779487811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sonny_en_5.1.2_3.0_1694779487811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sonny","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sonny", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sonny| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Sonny/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sophon_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sophon_en.md new file mode 100644 index 00000000000000..8ae491e84cc761 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_sophon_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_sophon DistilBertEmbeddings from Sophon +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_sophon +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_sophon` is a English model originally trained by Sophon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sophon_en_5.1.2_3.0_1694772344487.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_sophon_en_5.1.2_3.0_1694772344487.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sophon","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_sophon", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_sophon| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Sophon/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829_en.md new file mode 100644 index 00000000000000..b222b6dd23a283 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829 DistilBertEmbeddings from suzuki0829 +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829` is a English model originally trained by suzuki0829. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829_en_5.1.2_3.0_1694770510510.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829_en_5.1.2_3.0_1694770510510.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_suzuki0829| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/suzuki0829/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_thabet_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_thabet_en.md new file mode 100644 index 00000000000000..7a9acfa63cb213 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_thabet_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_thabet DistilBertEmbeddings from Thabet +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_thabet +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_thabet` is a English model originally trained by Thabet. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_thabet_en_5.1.2_3.0_1694784973757.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_thabet_en_5.1.2_3.0_1694784973757.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_thabet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_thabet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_thabet| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Thabet/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa_en.md new file mode 100644 index 00000000000000..319879f624e735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa DistilBertEmbeddings from ysugawa +author: John Snow Labs +name: distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa` is a English model originally trained by ysugawa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa_en_5.1.2_3.0_1694770212287.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa_en_5.1.2_3.0_1694770212287.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_squad_d5716d28_ysugawa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ysugawa/distilbert-base-uncased-finetuned-squad-d5716d28 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_test_headline_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_test_headline_en.md new file mode 100644 index 00000000000000..f01c79160c471c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_test_headline_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_test_headline DistilBertEmbeddings from lucypallent +author: John Snow Labs +name: distilbert_base_uncased_finetuned_test_headline +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_test_headline` is a English model originally trained by lucypallent. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_test_headline_en_5.1.2_3.0_1694790033469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_test_headline_en_5.1.2_3.0_1694790033469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_test_headline","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_test_headline", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_test_headline| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lucypallent/distilbert-base-uncased-finetuned-test-headline \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_tweet_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_tweet_en.md new file mode 100644 index 00000000000000..cb54b422676481 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_tweet_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_tweet DistilBertEmbeddings from shreyasdatar +author: John Snow Labs +name: distilbert_base_uncased_finetuned_tweet +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_tweet` is a English model originally trained by shreyasdatar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_tweet_en_5.1.2_3.0_1694783838766.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_tweet_en_5.1.2_3.0_1694783838766.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_tweet","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_tweet", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_tweet| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/shreyasdatar/distilbert-base-uncased-finetuned-tweet \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_vk_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_vk_en.md new file mode 100644 index 00000000000000..b27a7654af3211 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_vk_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_vk DistilBertEmbeddings from bruhwalkk +author: John Snow Labs +name: distilbert_base_uncased_finetuned_vk +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_vk` is a English model originally trained by bruhwalkk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_vk_en_5.1.2_3.0_1694780625605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_vk_en_5.1.2_3.0_1694780625605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_vk","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_vk", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_vk| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.0 MB| + +## References + +https://huggingface.co/bruhwalkk/distilbert-base-uncased-finetuned-vk \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_wb_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_wb_en.md new file mode 100644 index 00000000000000..d2e6b25744d635 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_wb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_wb DistilBertEmbeddings from alexskrn +author: John Snow Labs +name: distilbert_base_uncased_finetuned_wb +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_wb` is a English model originally trained by alexskrn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_wb_en_5.1.2_3.0_1694782258713.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_wb_en_5.1.2_3.0_1694782258713.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_wb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_wb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_wb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/alexskrn/distilbert-base-uncased-finetuned-wb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_wos_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_wos_en.md new file mode 100644 index 00000000000000..c6ede77dc6af52 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_finetuned_wos_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_finetuned_wos DistilBertEmbeddings from alvin-wen +author: John Snow Labs +name: distilbert_base_uncased_finetuned_wos +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_finetuned_wos` is a English model originally trained by alvin-wen. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_wos_en_5.1.2_3.0_1694782853873.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_finetuned_wos_en_5.1.2_3.0_1694782853873.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_finetuned_wos","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_finetuned_wos", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_finetuned_wos| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/alvin-wen/distilbert-base-uncased-finetuned-wos \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_holocaust_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_holocaust_en.md new file mode 100644 index 00000000000000..39a447f9a36385 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_holocaust_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_holocaust DistilBertEmbeddings from wjbmattingly +author: John Snow Labs +name: distilbert_base_uncased_holocaust +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_holocaust` is a English model originally trained by wjbmattingly. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_holocaust_en_5.1.2_3.0_1694786616427.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_holocaust_en_5.1.2_3.0_1694786616427.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_holocaust","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_holocaust", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_holocaust| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/wjbmattingly/distilbert-base-uncased-holocaust \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_accelerate_en.md new file mode 100644 index 00000000000000..13a0359616d1d0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_imdb_accelerate DistilBertEmbeddings from hieule +author: John Snow Labs +name: distilbert_base_uncased_imdb_accelerate +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_imdb_accelerate` is a English model originally trained by hieule. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_imdb_accelerate_en_5.1.2_3.0_1694784667459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_imdb_accelerate_en_5.1.2_3.0_1694784667459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_imdb_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_imdb_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_imdb_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/hieule/distilbert-base-uncased-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_disbert1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_disbert1_en.md new file mode 100644 index 00000000000000..698d45123a8c62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_disbert1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_imdb_disbert1 DistilBertEmbeddings from Billwzl +author: John Snow Labs +name: distilbert_base_uncased_imdb_disbert1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_imdb_disbert1` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_imdb_disbert1_en_5.1.2_3.0_1694783463877.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_imdb_disbert1_en_5.1.2_3.0_1694783463877.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_imdb_disbert1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_imdb_disbert1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_imdb_disbert1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.0 MB| + +## References + +https://huggingface.co/Billwzl/distilbert-base-uncased-IMDB_disbert1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_distilbert_en.md new file mode 100644 index 00000000000000..02caca63fcd510 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_imdb_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_imdb_distilbert DistilBertEmbeddings from Billwzl +author: John Snow Labs +name: distilbert_base_uncased_imdb_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_imdb_distilbert` is a English model originally trained by Billwzl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_imdb_distilbert_en_5.1.2_3.0_1694782933769.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_imdb_distilbert_en_5.1.2_3.0_1694782933769.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_imdb_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_imdb_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_imdb_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Billwzl/distilbert-base-uncased-IMDB_distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_issues_128_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_issues_128_en.md new file mode 100644 index 00000000000000..c20f2d91d20f46 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_issues_128_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_issues_128 DistilBertEmbeddings from Chrispfield +author: John Snow Labs +name: distilbert_base_uncased_issues_128 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_issues_128` is a English model originally trained by Chrispfield. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_issues_128_en_5.1.2_3.0_1694784972786.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_issues_128_en_5.1.2_3.0_1694784972786.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_issues_128","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_issues_128", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_issues_128| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Chrispfield/distilbert-base-uncased-issues-128 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_linkedin_domain_adaptation_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_linkedin_domain_adaptation_en.md new file mode 100644 index 00000000000000..d27eeedeb45238 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_linkedin_domain_adaptation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_linkedin_domain_adaptation DistilBertEmbeddings from algiraldohe +author: John Snow Labs +name: distilbert_base_uncased_linkedin_domain_adaptation +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_linkedin_domain_adaptation` is a English model originally trained by algiraldohe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_linkedin_domain_adaptation_en_5.1.2_3.0_1694772620399.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_linkedin_domain_adaptation_en_5.1.2_3.0_1694772620399.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_linkedin_domain_adaptation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_linkedin_domain_adaptation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_linkedin_domain_adaptation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/algiraldohe/distilbert-base-uncased-linkedin-domain-adaptation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_malayalam_arxiv_papers_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_malayalam_arxiv_papers_en.md new file mode 100644 index 00000000000000..4348333d106e19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_malayalam_arxiv_papers_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_malayalam_arxiv_papers DistilBertEmbeddings from aalksii +author: John Snow Labs +name: distilbert_base_uncased_malayalam_arxiv_papers +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_malayalam_arxiv_papers` is a English model originally trained by aalksii. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_malayalam_arxiv_papers_en_5.1.2_3.0_1694783989229.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_malayalam_arxiv_papers_en_5.1.2_3.0_1694783989229.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_malayalam_arxiv_papers","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_malayalam_arxiv_papers", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_malayalam_arxiv_papers| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/aalksii/distilbert-base-uncased-ml-arxiv-papers \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mask_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mask_accelerate_en.md new file mode 100644 index 00000000000000..5e083c5e694835 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mask_accelerate_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_mask_accelerate DistilBertEmbeddings from xiannvhh +author: John Snow Labs +name: distilbert_base_uncased_mask_accelerate +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_mask_accelerate` is a English model originally trained by xiannvhh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mask_accelerate_en_5.1.2_3.0_1694785625253.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mask_accelerate_en_5.1.2_3.0_1694785625253.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_mask_accelerate","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_mask_accelerate", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_mask_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/xiannvhh/distilbert-base-uncased-mask-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mask_finetuned_imdb_v1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mask_finetuned_imdb_v1_en.md new file mode 100644 index 00000000000000..c88a0d0e3f6866 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mask_finetuned_imdb_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_mask_finetuned_imdb_v1 DistilBertEmbeddings from kaiku03 +author: John Snow Labs +name: distilbert_base_uncased_mask_finetuned_imdb_v1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_mask_finetuned_imdb_v1` is a English model originally trained by kaiku03. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mask_finetuned_imdb_v1_en_5.1.2_3.0_1694788413645.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mask_finetuned_imdb_v1_en_5.1.2_3.0_1694788413645.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_mask_finetuned_imdb_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_mask_finetuned_imdb_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_mask_finetuned_imdb_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/kaiku03/distilbert-base-uncased-mask-finetuned-imdb_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mlm_scirepeval_fos_chemistry_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mlm_scirepeval_fos_chemistry_en.md new file mode 100644 index 00000000000000..990cf1b3846257 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mlm_scirepeval_fos_chemistry_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_mlm_scirepeval_fos_chemistry DistilBertEmbeddings from jonas-luehrs +author: John Snow Labs +name: distilbert_base_uncased_mlm_scirepeval_fos_chemistry +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_mlm_scirepeval_fos_chemistry` is a English model originally trained by jonas-luehrs. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mlm_scirepeval_fos_chemistry_en_5.1.2_3.0_1694784976059.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mlm_scirepeval_fos_chemistry_en_5.1.2_3.0_1694784976059.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_mlm_scirepeval_fos_chemistry","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_mlm_scirepeval_fos_chemistry", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_mlm_scirepeval_fos_chemistry| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jonas-luehrs/distilbert-base-uncased-MLM-scirepeval_fos_chemistry \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mlm_tamil_local_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mlm_tamil_local_en.md new file mode 100644 index 00000000000000..1a86dcf2ea1831 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_mlm_tamil_local_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_mlm_tamil_local DistilBertEmbeddings from medhabi +author: John Snow Labs +name: distilbert_base_uncased_mlm_tamil_local +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_mlm_tamil_local` is a English model originally trained by medhabi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mlm_tamil_local_en_5.1.2_3.0_1694788852384.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_mlm_tamil_local_en_5.1.2_3.0_1694788852384.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_mlm_tamil_local","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_mlm_tamil_local", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_mlm_tamil_local| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/medhabi/distilbert-base-uncased-mlm-ta-local \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_scratch_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_scratch_en.md new file mode 100644 index 00000000000000..94c2c4fa2f0145 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_scratch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_scratch DistilBertEmbeddings from hieule +author: John Snow Labs +name: distilbert_base_uncased_scratch +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_scratch` is a English model originally trained by hieule. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_scratch_en_5.1.2_3.0_1694784563353.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_scratch_en_5.1.2_3.0_1694784563353.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_scratch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_scratch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_scratch| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/hieule/distilbert-base-uncased-scratch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_80_1x4_block_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_80_1x4_block_pruneofa_en.md new file mode 100644 index 00000000000000..d42b5dddc0ba91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_80_1x4_block_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_sparse_80_1x4_block_pruneofa DistilBertEmbeddings from Intel +author: John Snow Labs +name: distilbert_base_uncased_sparse_80_1x4_block_pruneofa +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_sparse_80_1x4_block_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_sparse_80_1x4_block_pruneofa_en_5.1.2_3.0_1694785612750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_sparse_80_1x4_block_pruneofa_en_5.1.2_3.0_1694785612750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_sparse_80_1x4_block_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_sparse_80_1x4_block_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_sparse_80_1x4_block_pruneofa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|136.5 MB| + +## References + +https://huggingface.co/Intel/distilbert-base-uncased-sparse-80-1x4-block-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_85_unstructured_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_85_unstructured_pruneofa_en.md new file mode 100644 index 00000000000000..a02bdabd658dce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_85_unstructured_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_sparse_85_unstructured_pruneofa DistilBertEmbeddings from Intel +author: John Snow Labs +name: distilbert_base_uncased_sparse_85_unstructured_pruneofa +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_sparse_85_unstructured_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_sparse_85_unstructured_pruneofa_en_5.1.2_3.0_1694778491947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_sparse_85_unstructured_pruneofa_en_5.1.2_3.0_1694778491947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_sparse_85_unstructured_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_sparse_85_unstructured_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_sparse_85_unstructured_pruneofa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|132.5 MB| + +## References + +https://huggingface.co/Intel/distilbert-base-uncased-sparse-85-unstructured-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_90_unstructured_pruneofa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_90_unstructured_pruneofa_en.md new file mode 100644 index 00000000000000..5a59934e1cf1d8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_sparse_90_unstructured_pruneofa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_sparse_90_unstructured_pruneofa DistilBertEmbeddings from Intel +author: John Snow Labs +name: distilbert_base_uncased_sparse_90_unstructured_pruneofa +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_sparse_90_unstructured_pruneofa` is a English model originally trained by Intel. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_sparse_90_unstructured_pruneofa_en_5.1.2_3.0_1694778619456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_sparse_90_unstructured_pruneofa_en_5.1.2_3.0_1694778619456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_sparse_90_unstructured_pruneofa","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_sparse_90_unstructured_pruneofa", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_sparse_90_unstructured_pruneofa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|123.3 MB| + +## References + +https://huggingface.co/Intel/distilbert-base-uncased-sparse-90-unstructured-pruneofa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_wholewordmasking_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_wholewordmasking_finetuned_imdb_en.md new file mode 100644 index 00000000000000..809064db15d5be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_base_uncased_wholewordmasking_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_base_uncased_wholewordmasking_finetuned_imdb DistilBertEmbeddings from VanHoan +author: John Snow Labs +name: distilbert_base_uncased_wholewordmasking_finetuned_imdb +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_base_uncased_wholewordmasking_finetuned_imdb` is a English model originally trained by VanHoan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_wholewordmasking_finetuned_imdb_en_5.1.2_3.0_1694784099518.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_base_uncased_wholewordmasking_finetuned_imdb_en_5.1.2_3.0_1694784099518.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_base_uncased_wholewordmasking_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_base_uncased_wholewordmasking_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_base_uncased_wholewordmasking_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/VanHoan/distilbert-base-uncased-WholeWordMasking-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_classification_eplorer_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_classification_eplorer_en.md new file mode 100644 index 00000000000000..26a254a4a3a7b5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_classification_eplorer_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_classification_eplorer DistilBertEmbeddings from edanigoben +author: John Snow Labs +name: distilbert_classification_eplorer +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_classification_eplorer` is a English model originally trained by edanigoben. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_classification_eplorer_en_5.1.2_3.0_1694778754715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_classification_eplorer_en_5.1.2_3.0_1694778754715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_classification_eplorer","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_classification_eplorer", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_classification_eplorer| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/edanigoben/distilbert-classification-eplorer \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_cmc_a8_h512_l4_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_cmc_a8_h512_l4_en.md new file mode 100644 index 00000000000000..744dc76b096790 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_cmc_a8_h512_l4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_cmc_a8_h512_l4 DistilBertEmbeddings from levuloihust +author: John Snow Labs +name: distilbert_cmc_a8_h512_l4 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_cmc_a8_h512_l4` is a English model originally trained by levuloihust. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_cmc_a8_h512_l4_en_5.1.2_3.0_1694782904245.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_cmc_a8_h512_l4_en_5.1.2_3.0_1694782904245.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_cmc_a8_h512_l4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_cmc_a8_h512_l4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_cmc_a8_h512_l4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|167.9 MB| + +## References + +https://huggingface.co/levuloihust/distilbert-cmc-A8-H512-L4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_domain_adapted_ecomm_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_domain_adapted_ecomm_en.md new file mode 100644 index 00000000000000..ecfa2e4d18f441 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_domain_adapted_ecomm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_domain_adapted_ecomm DistilBertEmbeddings from Kk2k +author: John Snow Labs +name: distilbert_domain_adapted_ecomm +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_domain_adapted_ecomm` is a English model originally trained by Kk2k. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_domain_adapted_ecomm_en_5.1.2_3.0_1694783099567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_domain_adapted_ecomm_en_5.1.2_3.0_1694783099567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_domain_adapted_ecomm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_domain_adapted_ecomm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_domain_adapted_ecomm| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/Kk2k/Distilbert_domain_adapted_ecomm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_base_uncased_continued_training_medqa_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_base_uncased_continued_training_medqa_en.md new file mode 100644 index 00000000000000..886624345f6f39 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_base_uncased_continued_training_medqa_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English Bert Embeddings model (from Shaier) +author: John Snow Labs +name: distilbert_embeddings_base_uncased_continued_training_medqa +date: 2023-09-15 +tags: [open_source, distilbert, distilbert_embeddings, distilbertformaskedlm, en, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForMaskedLM model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `distilbert-base-uncased-continued_training-medqa` is a English model originally trained by `Shaier`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_base_uncased_continued_training_medqa_en_5.1.2_3.0_1694777575763.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_base_uncased_continued_training_medqa_en_5.1.2_3.0_1694777575763.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_base_uncased_continued_training_medqa","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["I love Spark-NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_base_uncased_continued_training_medqa","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(True) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("I love Spark-NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_embeddings_base_uncased_continued_training_medqa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| +|Case sensitive:|false| + +## References + +References + +https://huggingface.co/Shaier/distilbert-base-uncased-continued_training-medqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_base_uncased_finetuned_imdb_accelerate_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_base_uncased_finetuned_imdb_accelerate_en.md new file mode 100644 index 00000000000000..3d3eba092e03a4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_base_uncased_finetuned_imdb_accelerate_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English Bert Embeddings Cased model (from nrsmac) +author: John Snow Labs +name: distilbert_embeddings_base_uncased_finetuned_imdb_accelerate +date: 2023-09-15 +tags: [open_source, distilbert, distilbert_embeddings, distilbertformaskedlm, en, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForMaskedLM model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `distilbert-base-uncased-finetuned-imdb-accelerate` is a English model originally trained by `nrsmac`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_base_uncased_finetuned_imdb_accelerate_en_5.1.2_3.0_1694779961580.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_base_uncased_finetuned_imdb_accelerate_en_5.1.2_3.0_1694779961580.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_base_uncased_finetuned_imdb_accelerate","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["I love Spark-NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_base_uncased_finetuned_imdb_accelerate","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(True) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("I love Spark-NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_embeddings_base_uncased_finetuned_imdb_accelerate| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| +|Case sensitive:|false| + +## References + +References + +https://huggingface.co/nrsmac/distilbert-base-uncased-finetuned-imdb-accelerate \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_bio_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_bio_cased_en.md new file mode 100644 index 00000000000000..9b96f3832a81fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_bio_cased_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English Bert Embeddings Cased model (from nlpie) +author: John Snow Labs +name: distilbert_embeddings_bio_cased +date: 2023-09-15 +tags: [open_source, distilbert, distilbert_embeddings, distilbertformaskedlm, en, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForMaskedLM model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bio-distilbert-cased` is a English model originally trained by `nlpie`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_bio_cased_en_5.1.2_3.0_1694783708969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_bio_cased_en_5.1.2_3.0_1694783708969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_bio_cased","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["I love Spark-NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_bio_cased","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(True) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("I love Spark-NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_embeddings_bio_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.7 MB| +|Case sensitive:|false| + +## References + +References + +https://huggingface.co/nlpie/bio-distilbert-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_bio_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_bio_uncased_en.md new file mode 100644 index 00000000000000..f692d7bf074a8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_bio_uncased_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English Bert Embeddings model (from nlpie) +author: John Snow Labs +name: distilbert_embeddings_bio_uncased +date: 2023-09-15 +tags: [open_source, distilbert, distilbert_embeddings, distilbertformaskedlm, en, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForMaskedLM model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `bio-distilbert-uncased` is a English model originally trained by `nlpie`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_bio_uncased_en_5.1.2_3.0_1694783869030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_bio_uncased_en_5.1.2_3.0_1694783869030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_bio_uncased","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["I love Spark-NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_bio_uncased","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(True) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("I love Spark-NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_embeddings_bio_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| +|Case sensitive:|false| + +## References + +References + +https://huggingface.co/nlpie/bio-distilbert-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_clinical_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_clinical_en.md new file mode 100644 index 00000000000000..a0a9413550703a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_clinical_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English Bert Embeddings model (from nlpie) +author: John Snow Labs +name: distilbert_embeddings_clinical +date: 2023-09-15 +tags: [open_source, distilbert, distilbert_embeddings, distilbertformaskedlm, en, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertForMaskedLM model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `clinical-distilbert` is a English model originally trained by `nlpie`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_clinical_en_5.1.2_3.0_1694771806781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_clinical_en_5.1.2_3.0_1694771806781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("document") + +tokenizer = Tokenizer() \ + .setInputCols("document") \ + .setOutputCol("token") + +embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_clinical","en") \ + .setInputCols(["document", "token"]) \ + .setOutputCol("embeddings") \ + .setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, tokenizer, embeddings]) + +data = spark.createDataFrame([["I love Spark-NLP"]]).toDF("text") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("document") + +val tokenizer = new Tokenizer() + .setInputCols("document") + .setOutputCol("token") + +val embeddings = DistilBertEmbeddings.pretrained("distilbert_embeddings_clinical","en") + .setInputCols(Array("document", "token")) + .setOutputCol("embeddings") + .setCaseSensitive(True) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, tokenizer, embeddings)) + +val data = Seq("I love Spark-NLP").toDS.toDF("text") + +val result = pipeline.fit(data).transform(data) +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_embeddings_clinical| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.6 MB| +|Case sensitive:|false| + +## References + +References + +https://huggingface.co/nlpie/clinical-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_google_job_data_tuned_trial_8_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_google_job_data_tuned_trial_8_en.md new file mode 100644 index 00000000000000..1d3c9cefce2c49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_google_job_data_tuned_trial_8_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_embeddings_google_job_data_tuned_trial_8 DistilBertEmbeddings from EslamAhmed +author: John Snow Labs +name: distilbert_embeddings_google_job_data_tuned_trial_8 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_embeddings_google_job_data_tuned_trial_8` is a English model originally trained by EslamAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_google_job_data_tuned_trial_8_en_5.1.2_3.0_1694773297076.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_google_job_data_tuned_trial_8_en_5.1.2_3.0_1694773297076.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_embeddings_google_job_data_tuned_trial_8","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_embeddings_google_job_data_tuned_trial_8", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_embeddings_google_job_data_tuned_trial_8| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/EslamAhmed/google_Job_data_tuned_trial_8_11-2-2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_we4lkd_aml_1921_2017_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_we4lkd_aml_1921_2017_en.md new file mode 100644 index 00000000000000..f869068d5dd6da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_embeddings_we4lkd_aml_1921_2017_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_embeddings_we4lkd_aml_1921_2017 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: distilbert_embeddings_we4lkd_aml_1921_2017 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_embeddings_we4lkd_aml_1921_2017` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_we4lkd_aml_1921_2017_en_5.1.2_3.0_1694779750303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_embeddings_we4lkd_aml_1921_2017_en_5.1.2_3.0_1694779750303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_embeddings_we4lkd_aml_1921_2017","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_embeddings_we4lkd_aml_1921_2017", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_embeddings_we4lkd_aml_1921_2017| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2017 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_imdb_lyk0013_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_imdb_lyk0013_en.md new file mode 100644 index 00000000000000..0b85162c5f5726 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_imdb_lyk0013_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_finetuned_imdb_lyk0013 DistilBertEmbeddings from lyk0013 +author: John Snow Labs +name: distilbert_finetuned_imdb_lyk0013 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_imdb_lyk0013` is a English model originally trained by lyk0013. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_imdb_lyk0013_en_5.1.2_3.0_1694790297554.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_imdb_lyk0013_en_5.1.2_3.0_1694790297554.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_finetuned_imdb_lyk0013","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_finetuned_imdb_lyk0013", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_imdb_lyk0013| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/lyk0013/distilbert-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_imdb_tsabing_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_imdb_tsabing_en.md new file mode 100644 index 00000000000000..b35ecb267a931f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_imdb_tsabing_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_finetuned_imdb_tsabing DistilBertEmbeddings from Tsabing +author: John Snow Labs +name: distilbert_finetuned_imdb_tsabing +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_imdb_tsabing` is a English model originally trained by Tsabing. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_imdb_tsabing_en_5.1.2_3.0_1694789241169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_imdb_tsabing_en_5.1.2_3.0_1694789241169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_finetuned_imdb_tsabing","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_finetuned_imdb_tsabing", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_imdb_tsabing| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Tsabing/distilbert-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_spmlm_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_spmlm_en.md new file mode 100644 index 00000000000000..41b6b0a5006381 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_finetuned_spmlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_finetuned_spmlm DistilBertEmbeddings from ashwathjadhav23 +author: John Snow Labs +name: distilbert_finetuned_spmlm +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_finetuned_spmlm` is a English model originally trained by ashwathjadhav23. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_spmlm_en_5.1.2_3.0_1694773887957.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_finetuned_spmlm_en_5.1.2_3.0_1694773887957.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_finetuned_spmlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_finetuned_spmlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_finetuned_spmlm| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/ashwathjadhav23/DistilBert_Finetuned_SpMLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_french_explorer_classification_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_french_explorer_classification_en.md new file mode 100644 index 00000000000000..5d13d4d3aeccb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_french_explorer_classification_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_french_explorer_classification DistilBertEmbeddings from factored +author: John Snow Labs +name: distilbert_french_explorer_classification +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_french_explorer_classification` is a English model originally trained by factored. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_french_explorer_classification_en_5.1.2_3.0_1694779552028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_french_explorer_classification_en_5.1.2_3.0_1694779552028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_french_explorer_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_french_explorer_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_french_explorer_classification| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.1 MB| + +## References + +https://huggingface.co/factored/distilbert-fr-explorer-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_hemingway_sar_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_hemingway_sar_en.md new file mode 100644 index 00000000000000..c6f43bb5523c7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_hemingway_sar_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_hemingway_sar DistilBertEmbeddings from khazen2 +author: John Snow Labs +name: distilbert_hemingway_sar +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_hemingway_sar` is a English model originally trained by khazen2. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_hemingway_sar_en_5.1.2_3.0_1694786578751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_hemingway_sar_en_5.1.2_3.0_1694786578751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_hemingway_sar","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_hemingway_sar", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_hemingway_sar| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/khazen2/DistilBERT_Hemingway_SAR \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_hinglish_big_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_hinglish_big_en.md new file mode 100644 index 00000000000000..5b9789063aac23 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_hinglish_big_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_hinglish_big DistilBertEmbeddings from aditeyabaral +author: John Snow Labs +name: distilbert_hinglish_big +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_hinglish_big` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_hinglish_big_en_5.1.2_3.0_1694779769717.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_hinglish_big_en_5.1.2_3.0_1694779769717.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_hinglish_big","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_hinglish_big", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_hinglish_big| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/aditeyabaral/distilbert-hinglish-big \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_hinglish_small_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_hinglish_small_en.md new file mode 100644 index 00000000000000..cb32a2ccc8ce17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_hinglish_small_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_hinglish_small DistilBertEmbeddings from aditeyabaral +author: John Snow Labs +name: distilbert_hinglish_small +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_hinglish_small` is a English model originally trained by aditeyabaral. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_hinglish_small_en_5.1.2_3.0_1694779874373.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_hinglish_small_en_5.1.2_3.0_1694779874373.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_hinglish_small","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_hinglish_small", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_hinglish_small| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/aditeyabaral/distilbert-hinglish-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_imdb_negative_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_imdb_negative_en.md new file mode 100644 index 00000000000000..aa56d258ad77af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_imdb_negative_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_imdb_negative DistilBertEmbeddings from michalwilk123 +author: John Snow Labs +name: distilbert_imdb_negative +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_imdb_negative` is a English model originally trained by michalwilk123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_imdb_negative_en_5.1.2_3.0_1694782498235.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_imdb_negative_en_5.1.2_3.0_1694782498235.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_imdb_negative","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_imdb_negative", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_imdb_negative| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/michalwilk123/distilbert-imdb-negative \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_imdb_positive_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_imdb_positive_en.md new file mode 100644 index 00000000000000..ca0bd79a1b7f8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_imdb_positive_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_imdb_positive DistilBertEmbeddings from michalwilk123 +author: John Snow Labs +name: distilbert_imdb_positive +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_imdb_positive` is a English model originally trained by michalwilk123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_imdb_positive_en_5.1.2_3.0_1694782618191.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_imdb_positive_en_5.1.2_3.0_1694782618191.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_imdb_positive","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_imdb_positive", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_imdb_positive| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/michalwilk123/distilbert-imdb-positive \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_1000k_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_1000k_en.md new file mode 100644 index 00000000000000..c19330a9e22c6f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_1000k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_mlm_1000k DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_mlm_1000k +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_mlm_1000k` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_mlm_1000k_en_5.1.2_3.0_1694788526784.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_mlm_1000k_en_5.1.2_3.0_1694788526784.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_mlm_1000k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_mlm_1000k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_mlm_1000k| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|246.8 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-mlm-1000k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_250k_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_250k_en.md new file mode 100644 index 00000000000000..17506d9d98ca7d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_250k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_mlm_250k DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_mlm_250k +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_mlm_250k` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_mlm_250k_en_5.1.2_3.0_1694788223580.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_mlm_250k_en_5.1.2_3.0_1694788223580.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_mlm_250k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_mlm_250k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_mlm_250k| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.0 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-mlm-250k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_500k_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_500k_en.md new file mode 100644 index 00000000000000..09f5a95e3f3201 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_500k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_mlm_500k DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_mlm_500k +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_mlm_500k` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_mlm_500k_en_5.1.2_3.0_1694788324657.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_mlm_500k_en_5.1.2_3.0_1694788324657.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_mlm_500k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_mlm_500k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_mlm_500k| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|246.9 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-mlm-500k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_750k_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_750k_en.md new file mode 100644 index 00000000000000..dcc0f4d7c33da6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_750k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_mlm_750k DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_mlm_750k +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_mlm_750k` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_mlm_750k_en_5.1.2_3.0_1694788420155.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_mlm_750k_en_5.1.2_3.0_1694788420155.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_mlm_750k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_mlm_750k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_mlm_750k| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|246.8 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-mlm-750k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_best_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_best_en.md new file mode 100644 index 00000000000000..1eaec4ce82c3fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_mlm_best_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_mlm_best DistilBertEmbeddings from vocab-transformers +author: John Snow Labs +name: distilbert_mlm_best +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_mlm_best` is a English model originally trained by vocab-transformers. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_mlm_best_en_5.1.2_3.0_1694788626661.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_mlm_best_en_5.1.2_3.0_1694788626661.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_mlm_best","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_mlm_best", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_mlm_best| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|246.9 MB| + +## References + +https://huggingface.co/vocab-transformers/distilbert-mlm-best \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_perigon_200k_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_perigon_200k_en.md new file mode 100644 index 00000000000000..6fcc0bad67de51 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_perigon_200k_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_perigon_200k DistilBertEmbeddings from judy93536 +author: John Snow Labs +name: distilbert_perigon_200k +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_perigon_200k` is a English model originally trained by judy93536. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_perigon_200k_en_5.1.2_3.0_1694785838237.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_perigon_200k_en_5.1.2_3.0_1694785838237.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_perigon_200k","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_perigon_200k", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_perigon_200k| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/judy93536/distilbert-perigon-200k \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_pubmed_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_pubmed_mlm_en.md new file mode 100644 index 00000000000000..9b38b3d88bb013 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_pubmed_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_pubmed_mlm DistilBertEmbeddings from Gaborandi +author: John Snow Labs +name: distilbert_pubmed_mlm +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_pubmed_mlm` is a English model originally trained by Gaborandi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_pubmed_mlm_en_5.1.2_3.0_1694775368835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_pubmed_mlm_en_5.1.2_3.0_1694775368835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_pubmed_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_pubmed_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_pubmed_mlm| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/Gaborandi/distilbert-pubmed-MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_ravenk_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_ravenk_en.md new file mode 100644 index 00000000000000..2db4d812e44890 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_ravenk_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_ravenk DistilBertEmbeddings from RavenK +author: John Snow Labs +name: distilbert_ravenk +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_ravenk` is a English model originally trained by RavenK. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_ravenk_en_5.1.2_3.0_1694786300721.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_ravenk_en_5.1.2_3.0_1694786300721.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_ravenk","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_ravenk", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_ravenk| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/RavenK/distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_sparsembed_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_sparsembed_en.md new file mode 100644 index 00000000000000..a7991ca7e186ab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_sparsembed_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_sparsembed DistilBertEmbeddings from raphaelsty +author: John Snow Labs +name: distilbert_sparsembed +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_sparsembed` is a English model originally trained by raphaelsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_sparsembed_en_5.1.2_3.0_1694787885018.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_sparsembed_en_5.1.2_3.0_1694787885018.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_sparsembed","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_sparsembed", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_sparsembed| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/raphaelsty/distilbert-sparsembed \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_splade_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_splade_en.md new file mode 100644 index 00000000000000..938bd19e5035ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_splade_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_splade DistilBertEmbeddings from raphaelsty +author: John Snow Labs +name: distilbert_splade +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_splade` is a English model originally trained by raphaelsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_splade_en_5.1.2_3.0_1694787786344.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_splade_en_5.1.2_3.0_1694787786344.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_splade","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_splade", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_splade| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/raphaelsty/distilbert-splade \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_ugiugi_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_ugiugi_en.md new file mode 100644 index 00000000000000..4ca0440e6b3c1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_ugiugi_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_ugiugi DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: distilbert_ugiugi +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_ugiugi` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_ugiugi_en_5.1.2_3.0_1694785503613.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_ugiugi_en_5.1.2_3.0_1694785503613.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_ugiugi","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_ugiugi", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_ugiugi| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ugiugi/distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distilbert_v1_en.md b/docs/_posts/ahmedlone127/2023-09-15-distilbert_v1_en.md new file mode 100644 index 00000000000000..53d82d34f6bb8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distilbert_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distilbert_v1 DistilBertEmbeddings from Amirosein +author: John Snow Labs +name: distilbert_v1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distilbert_v1` is a English model originally trained by Amirosein. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distilbert_v1_en_5.1.2_3.0_1694770244017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distilbert_v1_en_5.1.2_3.0_1694770244017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distilbert_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distilbert_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distilbert_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|276.0 MB| + +## References + +https://huggingface.co/Amirosein/distilbert_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distill_test_en.md b/docs/_posts/ahmedlone127/2023-09-15-distill_test_en.md new file mode 100644 index 00000000000000..b02c7b84a03c88 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distill_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distill_test DistilBertEmbeddings from domenicrosati +author: John Snow Labs +name: distill_test +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distill_test` is a English model originally trained by domenicrosati. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distill_test_en_5.1.2_3.0_1694778046009.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distill_test_en_5.1.2_3.0_1694778046009.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distill_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distill_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distill_test| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/domenicrosati/distill-test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distillbert_base_spanish_uncased_finetuned_imdb_en.md b/docs/_posts/ahmedlone127/2023-09-15-distillbert_base_spanish_uncased_finetuned_imdb_en.md new file mode 100644 index 00000000000000..67a6ad4c4cac72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distillbert_base_spanish_uncased_finetuned_imdb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distillbert_base_spanish_uncased_finetuned_imdb DistilBertEmbeddings from franfram +author: John Snow Labs +name: distillbert_base_spanish_uncased_finetuned_imdb +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_base_spanish_uncased_finetuned_imdb` is a English model originally trained by franfram. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_base_spanish_uncased_finetuned_imdb_en_5.1.2_3.0_1694790663206.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_base_spanish_uncased_finetuned_imdb_en_5.1.2_3.0_1694790663206.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distillbert_base_spanish_uncased_finetuned_imdb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distillbert_base_spanish_uncased_finetuned_imdb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_base_spanish_uncased_finetuned_imdb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/franfram/distillbert-base-spanish-uncased-finetuned-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-distillbert_base_spanish_uncased_finetuned_spanish_corpus_en.md b/docs/_posts/ahmedlone127/2023-09-15-distillbert_base_spanish_uncased_finetuned_spanish_corpus_en.md new file mode 100644 index 00000000000000..52ed7660530b68 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-distillbert_base_spanish_uncased_finetuned_spanish_corpus_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English distillbert_base_spanish_uncased_finetuned_spanish_corpus DistilBertEmbeddings from franfram +author: John Snow Labs +name: distillbert_base_spanish_uncased_finetuned_spanish_corpus +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`distillbert_base_spanish_uncased_finetuned_spanish_corpus` is a English model originally trained by franfram. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/distillbert_base_spanish_uncased_finetuned_spanish_corpus_en_5.1.2_3.0_1694769979577.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/distillbert_base_spanish_uncased_finetuned_spanish_corpus_en_5.1.2_3.0_1694769979577.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("distillbert_base_spanish_uncased_finetuned_spanish_corpus","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("distillbert_base_spanish_uncased_finetuned_spanish_corpus", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|distillbert_base_spanish_uncased_finetuned_spanish_corpus| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|250.2 MB| + +## References + +https://huggingface.co/franfram/distillbert-base-spanish-uncased-finetuned-spanish-corpus \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-domain_adapted_arbert_goudma_en.md b/docs/_posts/ahmedlone127/2023-09-15-domain_adapted_arbert_goudma_en.md new file mode 100644 index 00000000000000..6f85457b85df97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-domain_adapted_arbert_goudma_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English domain_adapted_arbert_goudma DistilBertEmbeddings from YassineToughrai +author: John Snow Labs +name: domain_adapted_arbert_goudma +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`domain_adapted_arbert_goudma` is a English model originally trained by YassineToughrai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/domain_adapted_arbert_goudma_en_5.1.2_3.0_1694782061367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/domain_adapted_arbert_goudma_en_5.1.2_3.0_1694782061367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("domain_adapted_arbert_goudma","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("domain_adapted_arbert_goudma", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|domain_adapted_arbert_goudma| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|606.8 MB| + +## References + +https://huggingface.co/YassineToughrai/Domain_adapted_ARBERT_GOUDMA \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-dummy_model_en.md b/docs/_posts/ahmedlone127/2023-09-15-dummy_model_en.md new file mode 100644 index 00000000000000..637b335a6645ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-dummy_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English dummy_model DistilBertEmbeddings from luoweijie +author: John Snow Labs +name: dummy_model +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`dummy_model` is a English model originally trained by luoweijie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/dummy_model_en_5.1.2_3.0_1694781796190.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/dummy_model_en_5.1.2_3.0_1694781796190.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("dummy_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("dummy_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|dummy_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/luoweijie/dummy-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-e4a_covid_distilbert_base_romanian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-e4a_covid_distilbert_base_romanian_cased_en.md new file mode 100644 index 00000000000000..03a4e43ee86fcb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-e4a_covid_distilbert_base_romanian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English e4a_covid_distilbert_base_romanian_cased DistilBertEmbeddings from racai +author: John Snow Labs +name: e4a_covid_distilbert_base_romanian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e4a_covid_distilbert_base_romanian_cased` is a English model originally trained by racai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e4a_covid_distilbert_base_romanian_cased_en_5.1.2_3.0_1694786227826.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e4a_covid_distilbert_base_romanian_cased_en_5.1.2_3.0_1694786227826.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("e4a_covid_distilbert_base_romanian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("e4a_covid_distilbert_base_romanian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e4a_covid_distilbert_base_romanian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|305.9 MB| + +## References + +https://huggingface.co/racai/e4a-covid-distilbert-base-romanian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-e4a_permits_distilbert_base_romanian_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-e4a_permits_distilbert_base_romanian_cased_en.md new file mode 100644 index 00000000000000..f223499ccfa25d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-e4a_permits_distilbert_base_romanian_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English e4a_permits_distilbert_base_romanian_cased DistilBertEmbeddings from racai +author: John Snow Labs +name: e4a_permits_distilbert_base_romanian_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`e4a_permits_distilbert_base_romanian_cased` is a English model originally trained by racai. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/e4a_permits_distilbert_base_romanian_cased_en_5.1.2_3.0_1694786350915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/e4a_permits_distilbert_base_romanian_cased_en_5.1.2_3.0_1694786350915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("e4a_permits_distilbert_base_romanian_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("e4a_permits_distilbert_base_romanian_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|e4a_permits_distilbert_base_romanian_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|305.9 MB| + +## References + +https://huggingface.co/racai/e4a-permits-distilbert-base-romanian-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_v_large_doc_en.md b/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_v_large_doc_en.md new file mode 100644 index 00000000000000..477278a7117682 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_v_large_doc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English efficient_splade_v_large_doc DistilBertEmbeddings from naver +author: John Snow Labs +name: efficient_splade_v_large_doc +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_splade_v_large_doc` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_splade_v_large_doc_en_5.1.2_3.0_1694778590163.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_splade_v_large_doc_en_5.1.2_3.0_1694778590163.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("efficient_splade_v_large_doc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("efficient_splade_v_large_doc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_splade_v_large_doc| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/naver/efficient-splade-V-large-doc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_v_large_query_en.md b/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_v_large_query_en.md new file mode 100644 index 00000000000000..bfc881dd2f0211 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_v_large_query_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English efficient_splade_v_large_query DistilBertEmbeddings from naver +author: John Snow Labs +name: efficient_splade_v_large_query +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_splade_v_large_query` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_splade_v_large_query_en_5.1.2_3.0_1694778680533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_splade_v_large_query_en_5.1.2_3.0_1694778680533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("efficient_splade_v_large_query","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("efficient_splade_v_large_query", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_splade_v_large_query| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/naver/efficient-splade-V-large-query \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_vi_bt_large_doc_en.md b/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_vi_bt_large_doc_en.md new file mode 100644 index 00000000000000..12c67a2c71b8d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-efficient_splade_vi_bt_large_doc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English efficient_splade_vi_bt_large_doc DistilBertEmbeddings from naver +author: John Snow Labs +name: efficient_splade_vi_bt_large_doc +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`efficient_splade_vi_bt_large_doc` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/efficient_splade_vi_bt_large_doc_en_5.1.2_3.0_1694778781866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/efficient_splade_vi_bt_large_doc_en_5.1.2_3.0_1694778781866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("efficient_splade_vi_bt_large_doc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("efficient_splade_vi_bt_large_doc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|efficient_splade_vi_bt_large_doc| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/naver/efficient-splade-VI-BT-large-doc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-eighteenth_century_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-eighteenth_century_distilbert_en.md new file mode 100644 index 00000000000000..28896709b6a7aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-eighteenth_century_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English eighteenth_century_distilbert DistilBertEmbeddings from davanstrien +author: John Snow Labs +name: eighteenth_century_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`eighteenth_century_distilbert` is a English model originally trained by davanstrien. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/eighteenth_century_distilbert_en_5.1.2_3.0_1694780486021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/eighteenth_century_distilbert_en_5.1.2_3.0_1694780486021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("eighteenth_century_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("eighteenth_century_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|eighteenth_century_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.4 MB| + +## References + +https://huggingface.co/davanstrien/eighteenth-century-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-erwt_year_en.md b/docs/_posts/ahmedlone127/2023-09-15-erwt_year_en.md new file mode 100644 index 00000000000000..3c4d09ca41735d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-erwt_year_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English erwt_year DistilBertEmbeddings from Livingwithmachines +author: John Snow Labs +name: erwt_year +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`erwt_year` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/erwt_year_en_5.1.2_3.0_1694780244581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/erwt_year_en_5.1.2_3.0_1694780244581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("erwt_year","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("erwt_year", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|erwt_year| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Livingwithmachines/erwt-year \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-erwt_year_masked_25_en.md b/docs/_posts/ahmedlone127/2023-09-15-erwt_year_masked_25_en.md new file mode 100644 index 00000000000000..06371f700c903b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-erwt_year_masked_25_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English erwt_year_masked_25 DistilBertEmbeddings from Livingwithmachines +author: John Snow Labs +name: erwt_year_masked_25 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`erwt_year_masked_25` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/erwt_year_masked_25_en_5.1.2_3.0_1694780473232.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/erwt_year_masked_25_en_5.1.2_3.0_1694780473232.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("erwt_year_masked_25","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("erwt_year_masked_25", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|erwt_year_masked_25| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Livingwithmachines/erwt-year-masked-25 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-erwt_year_masked_75_en.md b/docs/_posts/ahmedlone127/2023-09-15-erwt_year_masked_75_en.md new file mode 100644 index 00000000000000..8c8bc839960829 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-erwt_year_masked_75_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English erwt_year_masked_75 DistilBertEmbeddings from Livingwithmachines +author: John Snow Labs +name: erwt_year_masked_75 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`erwt_year_masked_75` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/erwt_year_masked_75_en_5.1.2_3.0_1694780617608.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/erwt_year_masked_75_en_5.1.2_3.0_1694780617608.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("erwt_year_masked_75","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("erwt_year_masked_75", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|erwt_year_masked_75| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Livingwithmachines/erwt-year-masked-75 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-erwt_year_southern_sotho_en.md b/docs/_posts/ahmedlone127/2023-09-15-erwt_year_southern_sotho_en.md new file mode 100644 index 00000000000000..bf355fe105aac2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-erwt_year_southern_sotho_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English erwt_year_southern_sotho DistilBertEmbeddings from Livingwithmachines +author: John Snow Labs +name: erwt_year_southern_sotho +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`erwt_year_southern_sotho` is a English model originally trained by Livingwithmachines. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/erwt_year_southern_sotho_en_5.1.2_3.0_1694780351987.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/erwt_year_southern_sotho_en_5.1.2_3.0_1694780351987.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("erwt_year_southern_sotho","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("erwt_year_southern_sotho", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|erwt_year_southern_sotho| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.4 MB| + +## References + +https://huggingface.co/Livingwithmachines/erwt-year-st \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-experiment_en.md b/docs/_posts/ahmedlone127/2023-09-15-experiment_en.md new file mode 100644 index 00000000000000..b8c55b9fd506fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-experiment_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English experiment DistilBertEmbeddings from apl33 +author: John Snow Labs +name: experiment +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`experiment` is a English model originally trained by apl33. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/experiment_en_5.1.2_3.0_1694779805982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/experiment_en_5.1.2_3.0_1694779805982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("experiment","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("experiment", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|experiment| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/apl33/experiment \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-few_mask_en.md b/docs/_posts/ahmedlone127/2023-09-15-few_mask_en.md new file mode 100644 index 00000000000000..ce492ff554f657 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-few_mask_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English few_mask DistilBertEmbeddings from Ondiet +author: John Snow Labs +name: few_mask +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`few_mask` is a English model originally trained by Ondiet. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/few_mask_en_5.1.2_3.0_1694788941452.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/few_mask_en_5.1.2_3.0_1694788941452.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("few_mask","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("few_mask", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|few_mask| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Ondiet/few_mask \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-film20000distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-film20000distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..8cc80dd7d0cead --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-film20000distilbert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English film20000distilbert_base_uncased DistilBertEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film20000distilbert_base_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film20000distilbert_base_uncased` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film20000distilbert_base_uncased_en_5.1.2_3.0_1694774514582.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film20000distilbert_base_uncased_en_5.1.2_3.0_1694774514582.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("film20000distilbert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("film20000distilbert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film20000distilbert_base_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film20000distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-film20000film20000distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-film20000film20000distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..cae359671ffaae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-film20000film20000distilbert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English film20000film20000distilbert_base_uncased DistilBertEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film20000film20000distilbert_base_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film20000film20000distilbert_base_uncased` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film20000film20000distilbert_base_uncased_en_5.1.2_3.0_1694775693003.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film20000film20000distilbert_base_uncased_en_5.1.2_3.0_1694775693003.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("film20000film20000distilbert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("film20000film20000distilbert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film20000film20000distilbert_base_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film20000film20000distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-film95000distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-film95000distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..1a617a1bdd3510 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-film95000distilbert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English film95000distilbert_base_uncased DistilBertEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film95000distilbert_base_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film95000distilbert_base_uncased` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film95000distilbert_base_uncased_en_5.1.2_3.0_1694776512015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film95000distilbert_base_uncased_en_5.1.2_3.0_1694776512015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("film95000distilbert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("film95000distilbert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film95000distilbert_base_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film95000distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-film98984distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-film98984distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..2d5bbcbe0fd425 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-film98984distilbert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English film98984distilbert_base_uncased DistilBertEmbeddings from AmaiaSolaun +author: John Snow Labs +name: film98984distilbert_base_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`film98984distilbert_base_uncased` is a English model originally trained by AmaiaSolaun. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/film98984distilbert_base_uncased_en_5.1.2_3.0_1694775585695.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/film98984distilbert_base_uncased_en_5.1.2_3.0_1694775585695.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("film98984distilbert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("film98984distilbert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|film98984distilbert_base_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/AmaiaSolaun/film98984distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-fine_tuned_distilbert_nosql_injection_en.md b/docs/_posts/ahmedlone127/2023-09-15-fine_tuned_distilbert_nosql_injection_en.md new file mode 100644 index 00000000000000..4f609c5a04c68a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-fine_tuned_distilbert_nosql_injection_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English fine_tuned_distilbert_nosql_injection DistilBertEmbeddings from ankush-003 +author: John Snow Labs +name: fine_tuned_distilbert_nosql_injection +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`fine_tuned_distilbert_nosql_injection` is a English model originally trained by ankush-003. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_nosql_injection_en_5.1.2_3.0_1694775832410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/fine_tuned_distilbert_nosql_injection_en_5.1.2_3.0_1694775832410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("fine_tuned_distilbert_nosql_injection","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("fine_tuned_distilbert_nosql_injection", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|fine_tuned_distilbert_nosql_injection| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/ankush-003/fine-tuned-distilbert-nosql-injection \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-first_try_4_en.md b/docs/_posts/ahmedlone127/2023-09-15-first_try_4_en.md new file mode 100644 index 00000000000000..a26be8582a30fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-first_try_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English first_try_4 DistilBertEmbeddings from disanda +author: John Snow Labs +name: first_try_4 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`first_try_4` is a English model originally trained by disanda. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/first_try_4_en_5.1.2_3.0_1694773416430.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/first_try_4_en_5.1.2_3.0_1694773416430.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("first_try_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("first_try_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|first_try_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/disanda/first_try_4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-flang_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-flang_distilbert_en.md new file mode 100644 index 00000000000000..f6ef0f490e70cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-flang_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English flang_distilbert DistilBertEmbeddings from SALT-NLP +author: John Snow Labs +name: flang_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`flang_distilbert` is a English model originally trained by SALT-NLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/flang_distilbert_en_5.1.2_3.0_1694777676228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/flang_distilbert_en_5.1.2_3.0_1694777676228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("flang_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("flang_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|flang_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SALT-NLP/FLANG-DistilBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-german_poetry_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-german_poetry_distilbert_en.md new file mode 100644 index 00000000000000..8d51fc0905638c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-german_poetry_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English german_poetry_distilbert DistilBertEmbeddings from Anjoe +author: John Snow Labs +name: german_poetry_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`german_poetry_distilbert` is a English model originally trained by Anjoe. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/german_poetry_distilbert_en_5.1.2_3.0_1694774320800.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/german_poetry_distilbert_en_5.1.2_3.0_1694774320800.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("german_poetry_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("german_poetry_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|german_poetry_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|250.3 MB| + +## References + +https://huggingface.co/Anjoe/german-poetry-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-google_Job_data_tuned_trial_2_11_2_2022_en.md b/docs/_posts/ahmedlone127/2023-09-15-google_Job_data_tuned_trial_2_11_2_2022_en.md new file mode 100644 index 00000000000000..53f8d0bd36e848 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-google_Job_data_tuned_trial_2_11_2_2022_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English google_Job_data_tuned_trial_2_11_2_2022 DistilBertEmbeddings from EslamAhmed +author: John Snow Labs +name: google_Job_data_tuned_trial_2_11_2_2022 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`google_Job_data_tuned_trial_2_11_2_2022` is a English model originally trained by EslamAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/google_Job_data_tuned_trial_2_11_2_2022_en_5.1.2_3.0_1694736099439.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/google_Job_data_tuned_trial_2_11_2_2022_en_5.1.2_3.0_1694736099439.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("google_Job_data_tuned_trial_2_11_2_2022","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("google_Job_data_tuned_trial_2_11_2_2022", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|google_Job_data_tuned_trial_2_11_2_2022| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/EslamAhmed/google_Job_data_tuned_trial_2_11-2-2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-google_job_data_tuned_trial_1_en.md b/docs/_posts/ahmedlone127/2023-09-15-google_job_data_tuned_trial_1_en.md new file mode 100644 index 00000000000000..6a6806eaa9bf20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-google_job_data_tuned_trial_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English google_job_data_tuned_trial_1 DistilBertEmbeddings from EslamAhmed +author: John Snow Labs +name: google_job_data_tuned_trial_1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`google_job_data_tuned_trial_1` is a English model originally trained by EslamAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/google_job_data_tuned_trial_1_en_5.1.2_3.0_1694770671424.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/google_job_data_tuned_trial_1_en_5.1.2_3.0_1694770671424.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("google_job_data_tuned_trial_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("google_job_data_tuned_trial_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|google_job_data_tuned_trial_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/EslamAhmed/google_Job_data_tuned_trial_1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-google_job_data_tuned_trial_2_11_2_2022_en.md b/docs/_posts/ahmedlone127/2023-09-15-google_job_data_tuned_trial_2_11_2_2022_en.md new file mode 100644 index 00000000000000..09bd2241a5d469 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-google_job_data_tuned_trial_2_11_2_2022_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English google_job_data_tuned_trial_2_11_2_2022 DistilBertEmbeddings from EslamAhmed +author: John Snow Labs +name: google_job_data_tuned_trial_2_11_2_2022 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`google_job_data_tuned_trial_2_11_2_2022` is a English model originally trained by EslamAhmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/google_job_data_tuned_trial_2_11_2_2022_en_5.1.2_3.0_1694772782812.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/google_job_data_tuned_trial_2_11_2_2022_en_5.1.2_3.0_1694772782812.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("google_job_data_tuned_trial_2_11_2_2022","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("google_job_data_tuned_trial_2_11_2_2022", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|google_job_data_tuned_trial_2_11_2_2022| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|402.3 MB| + +## References + +https://huggingface.co/EslamAhmed/google_Job_data_tuned_trial_2_11-2-2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-hf_distilbert_imdb_mlm_cosine_en.md b/docs/_posts/ahmedlone127/2023-09-15-hf_distilbert_imdb_mlm_cosine_en.md new file mode 100644 index 00000000000000..321a26b9575858 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-hf_distilbert_imdb_mlm_cosine_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hf_distilbert_imdb_mlm_cosine DistilBertEmbeddings from nos1de +author: John Snow Labs +name: hf_distilbert_imdb_mlm_cosine +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hf_distilbert_imdb_mlm_cosine` is a English model originally trained by nos1de. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_cosine_en_5.1.2_3.0_1694769976827.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_cosine_en_5.1.2_3.0_1694769976827.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("hf_distilbert_imdb_mlm_cosine","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("hf_distilbert_imdb_mlm_cosine", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hf_distilbert_imdb_mlm_cosine| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/nos1de/hf-distilbert-imdb-mlm-cosine \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-hf_distilbert_imdb_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-15-hf_distilbert_imdb_mlm_en.md new file mode 100644 index 00000000000000..598fef3dfcef95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-hf_distilbert_imdb_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hf_distilbert_imdb_mlm DistilBertEmbeddings from nos1de +author: John Snow Labs +name: hf_distilbert_imdb_mlm +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hf_distilbert_imdb_mlm` is a English model originally trained by nos1de. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_en_5.1.2_3.0_1694791148509.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hf_distilbert_imdb_mlm_en_5.1.2_3.0_1694791148509.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("hf_distilbert_imdb_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("hf_distilbert_imdb_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hf_distilbert_imdb_mlm| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/nos1de/hf-distilbert-imdb-mlm \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-hinglish_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-hinglish_distilbert_en.md new file mode 100644 index 00000000000000..c571c046c03da8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-hinglish_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English hinglish_distilbert DistilBertEmbeddings from meghanabhange +author: John Snow Labs +name: hinglish_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`hinglish_distilbert` is a English model originally trained by meghanabhange. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/hinglish_distilbert_en_5.1.2_3.0_1694782372500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/hinglish_distilbert_en_5.1.2_3.0_1694782372500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("hinglish_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("hinglish_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|hinglish_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/meghanabhange/Hinglish-DistilBert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-indic_transformers_hindi_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-indic_transformers_hindi_distilbert_en.md new file mode 100644 index 00000000000000..ae1d2a7f6cb8bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-indic_transformers_hindi_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English indic_transformers_hindi_distilbert DistilBertEmbeddings from neuralspace +author: John Snow Labs +name: indic_transformers_hindi_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`indic_transformers_hindi_distilbert` is a English model originally trained by neuralspace. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/indic_transformers_hindi_distilbert_en_5.1.2_3.0_1694783192541.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/indic_transformers_hindi_distilbert_en_5.1.2_3.0_1694783192541.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("indic_transformers_hindi_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("indic_transformers_hindi_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|indic_transformers_hindi_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/neuralspace/indic-transformers-hi-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adagrad_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adagrad_en.md new file mode 100644 index 00000000000000..324774f9db35d9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adagrad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_distilbert_mlm_adagrad DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_distilbert_mlm_adagrad +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_distilbert_mlm_adagrad` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adagrad_en_5.1.2_3.0_1694788039538.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adagrad_en_5.1.2_3.0_1694788039538.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_distilbert_mlm_adagrad","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_distilbert_mlm_adagrad", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_distilbert_mlm_adagrad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-DistilBERT-mlm-adagrad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_hf_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_hf_en.md new file mode 100644 index 00000000000000..d36fb1f1702e99 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_hf_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_distilbert_mlm_adamw_hf DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_distilbert_mlm_adamw_hf +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_distilbert_mlm_adamw_hf` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adamw_hf_en_5.1.2_3.0_1694787638535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adamw_hf_en_5.1.2_3.0_1694787638535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_distilbert_mlm_adamw_hf","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_distilbert_mlm_adamw_hf", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_distilbert_mlm_adamw_hf| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-DistilBERT-mlm-adamw_hf \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_0608_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_0608_en.md new file mode 100644 index 00000000000000..6f7fa1991c81dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_0608_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_distilbert_mlm_adamw_torch_0608 DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_distilbert_mlm_adamw_torch_0608 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_distilbert_mlm_adamw_torch_0608` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adamw_torch_0608_en_5.1.2_3.0_1694788142265.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adamw_torch_0608_en_5.1.2_3.0_1694788142265.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_distilbert_mlm_adamw_torch_0608","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_distilbert_mlm_adamw_torch_0608", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_distilbert_mlm_adamw_torch_0608| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.7 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-DistilBERT-mlm-adamw_torch_0608 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_en.md new file mode 100644 index 00000000000000..c93842c9710865 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_distilbert_mlm_adamw_torch DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_distilbert_mlm_adamw_torch +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_distilbert_mlm_adamw_torch` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adamw_torch_en_5.1.2_3.0_1694787825797.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adamw_torch_en_5.1.2_3.0_1694787825797.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_distilbert_mlm_adamw_torch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_distilbert_mlm_adamw_torch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_distilbert_mlm_adamw_torch| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-DistilBERT-mlm-adamw_torch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_fused_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_fused_en.md new file mode 100644 index 00000000000000..1063741d4d1b59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_adamw_torch_fused_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_distilbert_mlm_adamw_torch_fused DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_distilbert_mlm_adamw_torch_fused +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_distilbert_mlm_adamw_torch_fused` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adamw_torch_fused_en_5.1.2_3.0_1694788244866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_adamw_torch_fused_en_5.1.2_3.0_1694788244866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_distilbert_mlm_adamw_torch_fused","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_distilbert_mlm_adamw_torch_fused", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_distilbert_mlm_adamw_torch_fused| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-DistilBERT-mlm-adamw_torch_fused \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_lion_32bit_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_lion_32bit_en.md new file mode 100644 index 00000000000000..776b5189ddfcba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_lion_32bit_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_distilbert_mlm_lion_32bit DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_distilbert_mlm_lion_32bit +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_distilbert_mlm_lion_32bit` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_lion_32bit_en_5.1.2_3.0_1694788344252.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_lion_32bit_en_5.1.2_3.0_1694788344252.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_distilbert_mlm_lion_32bit","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_distilbert_mlm_lion_32bit", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_distilbert_mlm_lion_32bit| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|245.9 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-DistilBERT-mlm-lion_32bit \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_lion_32bit_test_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_lion_32bit_test_en.md new file mode 100644 index 00000000000000..0672005b36e2aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_lion_32bit_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_distilbert_mlm_lion_32bit_test DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_distilbert_mlm_lion_32bit_test +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_distilbert_mlm_lion_32bit_test` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_lion_32bit_test_en_5.1.2_3.0_1694788450083.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_lion_32bit_test_en_5.1.2_3.0_1694788450083.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_distilbert_mlm_lion_32bit_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_distilbert_mlm_lion_32bit_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_distilbert_mlm_lion_32bit_test| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|245.9 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-DistilBERT-mlm-lion_32bit_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_sgd_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_sgd_en.md new file mode 100644 index 00000000000000..c1dee01844e52a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_distilbert_mlm_sgd_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_distilbert_mlm_sgd DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_distilbert_mlm_sgd +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_distilbert_mlm_sgd` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_sgd_en_5.1.2_3.0_1694787934386.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_distilbert_mlm_sgd_en_5.1.2_3.0_1694787934386.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_distilbert_mlm_sgd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_distilbert_mlm_sgd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_distilbert_mlm_sgd| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-DistilBERT-mlm-sgd \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-inisw08_robert_mlm_adamw_torch_test_en.md b/docs/_posts/ahmedlone127/2023-09-15-inisw08_robert_mlm_adamw_torch_test_en.md new file mode 100644 index 00000000000000..dfff7490abf030 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-inisw08_robert_mlm_adamw_torch_test_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English inisw08_robert_mlm_adamw_torch_test DistilBertEmbeddings from ugiugi +author: John Snow Labs +name: inisw08_robert_mlm_adamw_torch_test +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`inisw08_robert_mlm_adamw_torch_test` is a English model originally trained by ugiugi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/inisw08_robert_mlm_adamw_torch_test_en_5.1.2_3.0_1694789322009.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/inisw08_robert_mlm_adamw_torch_test_en_5.1.2_3.0_1694789322009.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("inisw08_robert_mlm_adamw_torch_test","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("inisw08_robert_mlm_adamw_torch_test", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|inisw08_robert_mlm_adamw_torch_test| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/ugiugi/inisw08-RoBERT-mlm-adamw_torch_test \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-javanese_distilbert_small_imdb_jv.md b/docs/_posts/ahmedlone127/2023-09-15-javanese_distilbert_small_imdb_jv.md new file mode 100644 index 00000000000000..7954b2cb6667a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-javanese_distilbert_small_imdb_jv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Javanese javanese_distilbert_small_imdb DistilBertEmbeddings from w11wo +author: John Snow Labs +name: javanese_distilbert_small_imdb +date: 2023-09-15 +tags: [distilbert, jv, open_source, fill_mask, onnx] +task: Embeddings +language: jv +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`javanese_distilbert_small_imdb` is a Javanese model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/javanese_distilbert_small_imdb_jv_5.1.2_3.0_1694785464605.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/javanese_distilbert_small_imdb_jv_5.1.2_3.0_1694785464605.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("javanese_distilbert_small_imdb","jv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("javanese_distilbert_small_imdb", "jv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|javanese_distilbert_small_imdb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|jv| +|Size:|247.6 MB| + +## References + +https://huggingface.co/w11wo/javanese-distilbert-small-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-javanese_distilbert_small_jv.md b/docs/_posts/ahmedlone127/2023-09-15-javanese_distilbert_small_jv.md new file mode 100644 index 00000000000000..3374867b783e48 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-javanese_distilbert_small_jv.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Javanese javanese_distilbert_small DistilBertEmbeddings from w11wo +author: John Snow Labs +name: javanese_distilbert_small +date: 2023-09-15 +tags: [distilbert, jv, open_source, fill_mask, onnx] +task: Embeddings +language: jv +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`javanese_distilbert_small` is a Javanese model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/javanese_distilbert_small_jv_5.1.2_3.0_1694785574408.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/javanese_distilbert_small_jv_5.1.2_3.0_1694785574408.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("javanese_distilbert_small","jv") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("javanese_distilbert_small", "jv") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|javanese_distilbert_small| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|jv| +|Size:|247.6 MB| + +## References + +https://huggingface.co/w11wo/javanese-distilbert-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_full_corpus_10.0_en.md b/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_full_corpus_10.0_en.md new file mode 100644 index 00000000000000..16a4bef397b6e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_full_corpus_10.0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kaz_legal_distilbert_full_corpus_10.0 DistilBertEmbeddings from kaisar-barlybay-sse +author: John Snow Labs +name: kaz_legal_distilbert_full_corpus_10.0 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kaz_legal_distilbert_full_corpus_10.0` is a English model originally trained by kaisar-barlybay-sse. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kaz_legal_distilbert_full_corpus_10.0_en_5.1.2_3.0_1694781030169.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kaz_legal_distilbert_full_corpus_10.0_en_5.1.2_3.0_1694781030169.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("kaz_legal_distilbert_full_corpus_10.0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("kaz_legal_distilbert_full_corpus_10.0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kaz_legal_distilbert_full_corpus_10.0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/kaisar-barlybay-sse/kaz_legal_distilbert_full_corpus_10.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_full_corpus_3.0_en.md b/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_full_corpus_3.0_en.md new file mode 100644 index 00000000000000..316e5e5bcf3328 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_full_corpus_3.0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kaz_legal_distilbert_full_corpus_3.0 DistilBertEmbeddings from kaisar-barlybay-sse +author: John Snow Labs +name: kaz_legal_distilbert_full_corpus_3.0 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kaz_legal_distilbert_full_corpus_3.0` is a English model originally trained by kaisar-barlybay-sse. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kaz_legal_distilbert_full_corpus_3.0_en_5.1.2_3.0_1694780769803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kaz_legal_distilbert_full_corpus_3.0_en_5.1.2_3.0_1694780769803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("kaz_legal_distilbert_full_corpus_3.0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("kaz_legal_distilbert_full_corpus_3.0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kaz_legal_distilbert_full_corpus_3.0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/kaisar-barlybay-sse/kaz_legal_distilbert_full_corpus_3.0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545_en.md b/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545_en.md new file mode 100644 index 00000000000000..7bd19e82e8bdf7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545 DistilBertEmbeddings from kaisar-barlybay-sse +author: John Snow Labs +name: kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545` is a English model originally trained by kaisar-barlybay-sse. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545_en_5.1.2_3.0_1694782306602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545_en_5.1.2_3.0_1694782306602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/kaisar-barlybay-sse/kaz_legal_distilbert_legal_corpus_312818008_words_4.945454545454545 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-lsg_distilbert_base_uncased_4096_en.md b/docs/_posts/ahmedlone127/2023-09-15-lsg_distilbert_base_uncased_4096_en.md new file mode 100644 index 00000000000000..376789eb00edb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-lsg_distilbert_base_uncased_4096_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lsg_distilbert_base_uncased_4096 DistilBertEmbeddings from ccdv +author: John Snow Labs +name: lsg_distilbert_base_uncased_4096 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lsg_distilbert_base_uncased_4096` is a English model originally trained by ccdv. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lsg_distilbert_base_uncased_4096_en_5.1.2_3.0_1694786927231.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lsg_distilbert_base_uncased_4096_en_5.1.2_3.0_1694786927231.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("lsg_distilbert_base_uncased_4096","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("lsg_distilbert_base_uncased_4096", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lsg_distilbert_base_uncased_4096| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|257.5 MB| + +## References + +https://huggingface.co/ccdv/lsg-distilbert-base-uncased-4096 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-ltrc_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-ltrc_distilbert_en.md new file mode 100644 index 00000000000000..8437b1fe1fab69 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-ltrc_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ltrc_distilbert DistilBertEmbeddings from ltrctelugu +author: John Snow Labs +name: ltrc_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ltrc_distilbert` is a English model originally trained by ltrctelugu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ltrc_distilbert_en_5.1.2_3.0_1694781693527.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ltrc_distilbert_en_5.1.2_3.0_1694781693527.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("ltrc_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("ltrc_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ltrc_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.6 MB| + +## References + +https://huggingface.co/ltrctelugu/ltrc-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-m4_mlm_en.md b/docs/_posts/ahmedlone127/2023-09-15-m4_mlm_en.md new file mode 100644 index 00000000000000..a445e59cb23558 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-m4_mlm_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m4_mlm DistilBertEmbeddings from S2312dal +author: John Snow Labs +name: m4_mlm +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m4_mlm` is a English model originally trained by S2312dal. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m4_mlm_en_5.1.2_3.0_1694776951864.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m4_mlm_en_5.1.2_3.0_1694776951864.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("m4_mlm","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("m4_mlm", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m4_mlm| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/S2312dal/M4_MLM \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-malaysian_distilbert_small_ms.md b/docs/_posts/ahmedlone127/2023-09-15-malaysian_distilbert_small_ms.md new file mode 100644 index 00000000000000..4517c842fa1a98 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-malaysian_distilbert_small_ms.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Malay (macrolanguage) malaysian_distilbert_small DistilBertEmbeddings from w11wo +author: John Snow Labs +name: malaysian_distilbert_small +date: 2023-09-15 +tags: [distilbert, ms, open_source, fill_mask, onnx] +task: Embeddings +language: ms +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`malaysian_distilbert_small` is a Malay (macrolanguage) model originally trained by w11wo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/malaysian_distilbert_small_ms_5.1.2_3.0_1694785690565.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/malaysian_distilbert_small_ms_5.1.2_3.0_1694785690565.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("malaysian_distilbert_small","ms") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("malaysian_distilbert_small", "ms") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|malaysian_distilbert_small| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|ms| +|Size:|248.1 MB| + +## References + +https://huggingface.co/w11wo/malaysian-distilbert-small \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-marathi_distilbert_mr.md b/docs/_posts/ahmedlone127/2023-09-15-marathi_distilbert_mr.md new file mode 100644 index 00000000000000..c5a3ed8a1bd254 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-marathi_distilbert_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_distilbert DistilBertEmbeddings from DarshanDeshpande +author: John Snow Labs +name: marathi_distilbert +date: 2023-09-15 +tags: [distilbert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_distilbert` is a Marathi model originally trained by DarshanDeshpande. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_distilbert_mr_5.1.2_3.0_1694771200340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_distilbert_mr_5.1.2_3.0_1694771200340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("marathi_distilbert","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("marathi_distilbert", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|247.5 MB| + +## References + +https://huggingface.co/DarshanDeshpande/marathi-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-marathi_distilbert_pretrained_mr.md b/docs/_posts/ahmedlone127/2023-09-15-marathi_distilbert_pretrained_mr.md new file mode 100644 index 00000000000000..7c83b8dce548c1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-marathi_distilbert_pretrained_mr.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Marathi marathi_distilbert_pretrained DistilBertEmbeddings from boleshirish +author: John Snow Labs +name: marathi_distilbert_pretrained +date: 2023-09-15 +tags: [distilbert, mr, open_source, fill_mask, onnx] +task: Embeddings +language: mr +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`marathi_distilbert_pretrained` is a Marathi model originally trained by boleshirish. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/marathi_distilbert_pretrained_mr_5.1.2_3.0_1694788553079.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/marathi_distilbert_pretrained_mr_5.1.2_3.0_1694788553079.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("marathi_distilbert_pretrained","mr") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("marathi_distilbert_pretrained", "mr") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|marathi_distilbert_pretrained| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|mr| +|Size:|248.7 MB| + +## References + +https://huggingface.co/boleshirish/Marathi_DistilBert_Pretrained \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-masked_language_finetuned_model_en.md b/docs/_posts/ahmedlone127/2023-09-15-masked_language_finetuned_model_en.md new file mode 100644 index 00000000000000..40192a38512713 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-masked_language_finetuned_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_language_finetuned_model DistilBertEmbeddings from benlehrburger +author: John Snow Labs +name: masked_language_finetuned_model +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_language_finetuned_model` is a English model originally trained by benlehrburger. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_language_finetuned_model_en_5.1.2_3.0_1694786097156.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_language_finetuned_model_en_5.1.2_3.0_1694786097156.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("masked_language_finetuned_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("masked_language_finetuned_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_language_finetuned_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/benlehrburger/masked-language-finetuned-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-masked_language_model_en.md b/docs/_posts/ahmedlone127/2023-09-15-masked_language_model_en.md new file mode 100644 index 00000000000000..adcef53f7f409b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-masked_language_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English masked_language_model DistilBertEmbeddings from ayoolaolafenwa +author: John Snow Labs +name: masked_language_model +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`masked_language_model` is a English model originally trained by ayoolaolafenwa. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/masked_language_model_en_5.1.2_3.0_1694771656379.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/masked_language_model_en_5.1.2_3.0_1694771656379.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("masked_language_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("masked_language_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|masked_language_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/ayoolaolafenwa/Masked-Language-Model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-mdistilbertv2.1_en.md b/docs/_posts/ahmedlone127/2023-09-15-mdistilbertv2.1_en.md new file mode 100644 index 00000000000000..b23b5c99e69dd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-mdistilbertv2.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mdistilbertv2.1 DistilBertEmbeddings from bongsoo +author: John Snow Labs +name: mdistilbertv2.1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdistilbertv2.1` is a English model originally trained by bongsoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdistilbertv2.1_en_5.1.2_3.0_1694787507745.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdistilbertv2.1_en_5.1.2_3.0_1694787507745.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("mdistilbertv2.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("mdistilbertv2.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdistilbertv2.1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|602.3 MB| + +## References + +https://huggingface.co/bongsoo/mdistilbertV2.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-mdistilbertv3.1_en.md b/docs/_posts/ahmedlone127/2023-09-15-mdistilbertv3.1_en.md new file mode 100644 index 00000000000000..15d7d294a616f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-mdistilbertv3.1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mdistilbertv3.1 DistilBertEmbeddings from bongsoo +author: John Snow Labs +name: mdistilbertv3.1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mdistilbertv3.1` is a English model originally trained by bongsoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mdistilbertv3.1_en_5.1.2_3.0_1694771042552.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mdistilbertv3.1_en_5.1.2_3.0_1694771042552.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("mdistilbertv3.1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("mdistilbertv3.1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mdistilbertv3.1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|621.1 MB| + +## References + +https://huggingface.co/bongsoo/mdistilbertV3.1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-medicaldistilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-medicaldistilbert_en.md new file mode 100644 index 00000000000000..e6123d11546197 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-medicaldistilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English medicaldistilbert DistilBertEmbeddings from Gaborandi +author: John Snow Labs +name: medicaldistilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`medicaldistilbert` is a English model originally trained by Gaborandi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/medicaldistilbert_en_5.1.2_3.0_1694770810957.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/medicaldistilbert_en_5.1.2_3.0_1694770810957.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("medicaldistilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("medicaldistilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|medicaldistilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Gaborandi/Medicaldistilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-mlm_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-mlm_distilbert_en.md new file mode 100644 index 00000000000000..ece88dcd338735 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-mlm_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_distilbert DistilBertEmbeddings from dsoum +author: John Snow Labs +name: mlm_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_distilbert` is a English model originally trained by dsoum. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_distilbert_en_5.1.2_3.0_1694787195352.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_distilbert_en_5.1.2_3.0_1694787195352.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("mlm_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("mlm_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/dsoum/mlm-distilbert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-mlm_model_en.md b/docs/_posts/ahmedlone127/2023-09-15-mlm_model_en.md new file mode 100644 index 00000000000000..8a610ed40f2187 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-mlm_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mlm_model DistilBertEmbeddings from amkorba +author: John Snow Labs +name: mlm_model +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mlm_model` is a English model originally trained by amkorba. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mlm_model_en_5.1.2_3.0_1694772762718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mlm_model_en_5.1.2_3.0_1694772762718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("mlm_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("mlm_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mlm_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/amkorba/mlm-model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-mtl_distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-mtl_distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..4b67a1557223d5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-mtl_distilbert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mtl_distilbert_base_uncased DistilBertEmbeddings from jgammack +author: John Snow Labs +name: mtl_distilbert_base_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mtl_distilbert_base_uncased` is a English model originally trained by jgammack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mtl_distilbert_base_uncased_en_5.1.2_3.0_1694781088162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mtl_distilbert_base_uncased_en_5.1.2_3.0_1694781088162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("mtl_distilbert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("mtl_distilbert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mtl_distilbert_base_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jgammack/MTL-distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-output_en.md b/docs/_posts/ahmedlone127/2023-09-15-output_en.md new file mode 100644 index 00000000000000..6a86ca14ebdedf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-output_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English output DistilBertEmbeddings from soyisauce +author: John Snow Labs +name: output +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`output` is a English model originally trained by soyisauce. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/output_en_5.1.2_3.0_1694790179928.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/output_en_5.1.2_3.0_1694790179928.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("output","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("output", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|output| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/soyisauce/output \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-predreambert_en.md b/docs/_posts/ahmedlone127/2023-09-15-predreambert_en.md new file mode 100644 index 00000000000000..72253567f3f796 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-predreambert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English predreambert DistilBertEmbeddings from asheads +author: John Snow Labs +name: predreambert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`predreambert` is a English model originally trained by asheads. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/predreambert_en_5.1.2_3.0_1694780049333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/predreambert_en_5.1.2_3.0_1694780049333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("predreambert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("predreambert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|predreambert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/asheads/PredreamBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-pt_distilbert_base_en.md b/docs/_posts/ahmedlone127/2023-09-15-pt_distilbert_base_en.md new file mode 100644 index 00000000000000..c51f4eec9ad63d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-pt_distilbert_base_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English pt_distilbert_base DistilBertEmbeddings from SharedBailii +author: John Snow Labs +name: pt_distilbert_base +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`pt_distilbert_base` is a English model originally trained by SharedBailii. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/pt_distilbert_base_en_5.1.2_3.0_1694782385724.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/pt_distilbert_base_en_5.1.2_3.0_1694782385724.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("pt_distilbert_base","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("pt_distilbert_base", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|pt_distilbert_base| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/SharedBailii/PT-distilbert-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-remote_sensing_distilbert_cased_en.md b/docs/_posts/ahmedlone127/2023-09-15-remote_sensing_distilbert_cased_en.md new file mode 100644 index 00000000000000..0707313250a5a5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-remote_sensing_distilbert_cased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English remote_sensing_distilbert_cased DistilBertEmbeddings from Chramer +author: John Snow Labs +name: remote_sensing_distilbert_cased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`remote_sensing_distilbert_cased` is a English model originally trained by Chramer. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/remote_sensing_distilbert_cased_en_5.1.2_3.0_1694772072848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/remote_sensing_distilbert_cased_en_5.1.2_3.0_1694772072848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("remote_sensing_distilbert_cased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("remote_sensing_distilbert_cased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|remote_sensing_distilbert_cased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|243.7 MB| + +## References + +https://huggingface.co/Chramer/remote-sensing-distilbert-cased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-sae_distilbert_base_uncased_en.md b/docs/_posts/ahmedlone127/2023-09-15-sae_distilbert_base_uncased_en.md new file mode 100644 index 00000000000000..0f3157aeda9ed5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-sae_distilbert_base_uncased_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sae_distilbert_base_uncased DistilBertEmbeddings from jgammack +author: John Snow Labs +name: sae_distilbert_base_uncased +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sae_distilbert_base_uncased` is a English model originally trained by jgammack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sae_distilbert_base_uncased_en_5.1.2_3.0_1694781193775.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sae_distilbert_base_uncased_en_5.1.2_3.0_1694781193775.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("sae_distilbert_base_uncased","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("sae_distilbert_base_uncased", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sae_distilbert_base_uncased| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/jgammack/SAE-distilbert-base-uncased \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-skc_mlm_english_torch_en.md b/docs/_posts/ahmedlone127/2023-09-15-skc_mlm_english_torch_en.md new file mode 100644 index 00000000000000..17177818563154 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-skc_mlm_english_torch_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English skc_mlm_english_torch DistilBertEmbeddings from Tobias +author: John Snow Labs +name: skc_mlm_english_torch +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`skc_mlm_english_torch` is a English model originally trained by Tobias. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/skc_mlm_english_torch_en_5.1.2_3.0_1694782805554.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/skc_mlm_english_torch_en_5.1.2_3.0_1694782805554.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("skc_mlm_english_torch","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("skc_mlm_english_torch", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|skc_mlm_english_torch| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Tobias/skc_MLM_English_torch \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-sparsembed_max_en.md b/docs/_posts/ahmedlone127/2023-09-15-sparsembed_max_en.md new file mode 100644 index 00000000000000..fa8707c7a1768b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-sparsembed_max_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sparsembed_max DistilBertEmbeddings from raphaelsty +author: John Snow Labs +name: sparsembed_max +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sparsembed_max` is a English model originally trained by raphaelsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sparsembed_max_en_5.1.2_3.0_1694787581861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sparsembed_max_en_5.1.2_3.0_1694787581861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("sparsembed_max","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("sparsembed_max", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sparsembed_max| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/raphaelsty/sparsembed-max \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-splade_max_en.md b/docs/_posts/ahmedlone127/2023-09-15-splade_max_en.md new file mode 100644 index 00000000000000..1c920ce3fce2db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-splade_max_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_max DistilBertEmbeddings from raphaelsty +author: John Snow Labs +name: splade_max +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_max` is a English model originally trained by raphaelsty. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_max_en_5.1.2_3.0_1694787692631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_max_en_5.1.2_3.0_1694787692631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("splade_max","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("splade_max", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_max| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/raphaelsty/splade-max \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-splade_v2_distil_en.md b/docs/_posts/ahmedlone127/2023-09-15-splade_v2_distil_en.md new file mode 100644 index 00000000000000..43c1b804c422ff --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-splade_v2_distil_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_v2_distil DistilBertEmbeddings from naver +author: John Snow Labs +name: splade_v2_distil +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_v2_distil` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_v2_distil_en_5.1.2_3.0_1694779382367.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_v2_distil_en_5.1.2_3.0_1694779382367.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("splade_v2_distil","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("splade_v2_distil", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_v2_distil| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/naver/splade_v2_distil \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-splade_v2_max_en.md b/docs/_posts/ahmedlone127/2023-09-15-splade_v2_max_en.md new file mode 100644 index 00000000000000..d042ebb8c632d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-splade_v2_max_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English splade_v2_max DistilBertEmbeddings from naver +author: John Snow Labs +name: splade_v2_max +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`splade_v2_max` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/splade_v2_max_en_5.1.2_3.0_1694779276923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/splade_v2_max_en_5.1.2_3.0_1694779276923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("splade_v2_max","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("splade_v2_max", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|splade_v2_max| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.3 MB| + +## References + +https://huggingface.co/naver/splade_v2_max \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-spladex_tt_spanish_en.md b/docs/_posts/ahmedlone127/2023-09-15-spladex_tt_spanish_en.md new file mode 100644 index 00000000000000..31891ab0708eab --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-spladex_tt_spanish_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spladex_tt_spanish DistilBertEmbeddings from JAWCF +author: John Snow Labs +name: spladex_tt_spanish +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spladex_tt_spanish` is a English model originally trained by JAWCF. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spladex_tt_spanish_en_5.1.2_3.0_1694789904459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spladex_tt_spanish_en_5.1.2_3.0_1694789904459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("spladex_tt_spanish","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("spladex_tt_spanish", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spladex_tt_spanish| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/JAWCF/spladeX-TT-es \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-spladex_zs_en.md b/docs/_posts/ahmedlone127/2023-09-15-spladex_zs_en.md new file mode 100644 index 00000000000000..e79f094bcad8ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-spladex_zs_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English spladex_zs DistilBertEmbeddings from JAWCF +author: John Snow Labs +name: spladex_zs +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`spladex_zs` is a English model originally trained by JAWCF. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/spladex_zs_en_5.1.2_3.0_1694790065534.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/spladex_zs_en_5.1.2_3.0_1694790065534.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("spladex_zs","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("spladex_zs", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|spladex_zs| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/JAWCF/spladeX-ZS \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-tapt_distillbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-tapt_distillbert_en.md new file mode 100644 index 00000000000000..69937e84661fd4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-tapt_distillbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tapt_distillbert DistilBertEmbeddings from minhdang241 +author: John Snow Labs +name: tapt_distillbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tapt_distillbert` is a English model originally trained by minhdang241. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tapt_distillbert_en_5.1.2_3.0_1694782808771.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tapt_distillbert_en_5.1.2_3.0_1694782808771.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("tapt_distillbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("tapt_distillbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tapt_distillbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/minhdang241/TAPT_distillBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-test_text_en.md b/docs/_posts/ahmedlone127/2023-09-15-test_text_en.md new file mode 100644 index 00000000000000..d2be0e53c78ec8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-test_text_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English test_text DistilBertEmbeddings from joaogante +author: John Snow Labs +name: test_text +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`test_text` is a English model originally trained by joaogante. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/test_text_en_5.1.2_3.0_1694774637956.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/test_text_en_5.1.2_3.0_1694774637956.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("test_text","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("test_text", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|test_text| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/joaogante/test_text \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-tod_distilbert_jnt_v1_en.md b/docs/_posts/ahmedlone127/2023-09-15-tod_distilbert_jnt_v1_en.md new file mode 100644 index 00000000000000..861e565ff3c87f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-tod_distilbert_jnt_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tod_distilbert_jnt_v1 DistilBertEmbeddings from TODBERT +author: John Snow Labs +name: tod_distilbert_jnt_v1 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tod_distilbert_jnt_v1` is a English model originally trained by TODBERT. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tod_distilbert_jnt_v1_en_5.1.2_3.0_1694779640585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tod_distilbert_jnt_v1_en_5.1.2_3.0_1694779640585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("tod_distilbert_jnt_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("tod_distilbert_jnt_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tod_distilbert_jnt_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|246.8 MB| + +## References + +https://huggingface.co/TODBERT/TOD-DistilBERT-JNT-V1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-train_mask_language_model_en.md b/docs/_posts/ahmedlone127/2023-09-15-train_mask_language_model_en.md new file mode 100644 index 00000000000000..f0bff3435fbd59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-train_mask_language_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English train_mask_language_model DistilBertEmbeddings from hubert10 +author: John Snow Labs +name: train_mask_language_model +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`train_mask_language_model` is a English model originally trained by hubert10. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/train_mask_language_model_en_5.1.2_3.0_1694772588866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/train_mask_language_model_en_5.1.2_3.0_1694772588866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("train_mask_language_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("train_mask_language_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|train_mask_language_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/hubert10/train_mask_language_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-ukri_distilbert_en.md b/docs/_posts/ahmedlone127/2023-09-15-ukri_distilbert_en.md new file mode 100644 index 00000000000000..5902d7ce08ac0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-ukri_distilbert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English ukri_distilbert DistilBertEmbeddings from Brawl +author: John Snow Labs +name: ukri_distilbert +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`ukri_distilbert` is a English model originally trained by Brawl. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/ukri_distilbert_en_5.1.2_3.0_1694770523162.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/ukri_distilbert_en_5.1.2_3.0_1694770523162.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("ukri_distilbert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("ukri_distilbert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|ukri_distilbert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|247.2 MB| + +## References + +https://huggingface.co/Brawl/UKRI_DistilBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1963_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1963_en.md new file mode 100644 index 00000000000000..e800a194851297 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1963_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1963 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1963 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1963` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1963_en_5.1.2_3.0_1694773487647.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1963_en_5.1.2_3.0_1694773487647.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1963","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1963", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1963| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1963 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1964_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1964_en.md new file mode 100644 index 00000000000000..a38a2a04e59245 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1964_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1964 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1964 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1964` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1964_en_5.1.2_3.0_1694773586947.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1964_en_5.1.2_3.0_1694773586947.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1964","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1964", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1964| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1964 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1965_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1965_en.md new file mode 100644 index 00000000000000..820c72ba116f6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1965_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1965 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1965 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1965` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1965_en_5.1.2_3.0_1694773687356.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1965_en_5.1.2_3.0_1694773687356.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1965","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1965", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1965| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1965 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1966_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1966_en.md new file mode 100644 index 00000000000000..e18fa86cff9a13 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1966_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1966 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1966 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1966` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1966_en_5.1.2_3.0_1694773798546.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1966_en_5.1.2_3.0_1694773798546.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1966","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1966", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1966| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1966 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1967_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1967_en.md new file mode 100644 index 00000000000000..fb9ab2afc4d963 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1967_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1967 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1967 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1967` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1967_en_5.1.2_3.0_1694773929268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1967_en_5.1.2_3.0_1694773929268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1967","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1967", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1967| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1967 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1968_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1968_en.md new file mode 100644 index 00000000000000..60f33b9d3132b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1968_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1968 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1968 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1968` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1968_en_5.1.2_3.0_1694774027847.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1968_en_5.1.2_3.0_1694774027847.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1968","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1968", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1968| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1968 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1969_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1969_en.md new file mode 100644 index 00000000000000..ca4f8ff89ed8d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1969_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1969 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1969 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1969` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1969_en_5.1.2_3.0_1694774142310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1969_en_5.1.2_3.0_1694774142310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1969","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1969", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1969| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1969 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1970_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1970_en.md new file mode 100644 index 00000000000000..5ab2bc2ce8618c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1970_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1970 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1970 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1970` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1970_en_5.1.2_3.0_1694774244194.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1970_en_5.1.2_3.0_1694774244194.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1970","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1970", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1970| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1970 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1971_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1971_en.md new file mode 100644 index 00000000000000..71a3e6bcf0c16b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1971_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1971 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1971 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1971` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1971_en_5.1.2_3.0_1694774357133.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1971_en_5.1.2_3.0_1694774357133.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1971","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1971", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1971| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1971 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1972_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1972_en.md new file mode 100644 index 00000000000000..5f986da2d19b2a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1972_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1972 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1972 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1972` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1972_en_5.1.2_3.0_1694774463133.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1972_en_5.1.2_3.0_1694774463133.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1972","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1972", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1972| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1972 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1973_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1973_en.md new file mode 100644 index 00000000000000..f14431f32ff89d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1973_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1973 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1973 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1973` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1973_en_5.1.2_3.0_1694774590358.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1973_en_5.1.2_3.0_1694774590358.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1973","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1973", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1973| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1973 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1974_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1974_en.md new file mode 100644 index 00000000000000..95844ea70c7824 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1974_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1974 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1974 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1974` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1974_en_5.1.2_3.0_1694774693818.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1974_en_5.1.2_3.0_1694774693818.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1974","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1974", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1974| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1974 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1975_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1975_en.md new file mode 100644 index 00000000000000..7f5251a5778c40 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1975_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1975 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1975 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1975` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1975_en_5.1.2_3.0_1694774827264.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1975_en_5.1.2_3.0_1694774827264.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1975","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1975", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1975| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1975 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1976_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1976_en.md new file mode 100644 index 00000000000000..68783c132c6dfe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1976_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1976 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1976 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1976` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1976_en_5.1.2_3.0_1694774929781.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1976_en_5.1.2_3.0_1694774929781.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1976","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1976", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1976| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1976 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1977_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1977_en.md new file mode 100644 index 00000000000000..01808de95bdc50 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1977_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1977 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1977 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1977` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1977_en_5.1.2_3.0_1694775021489.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1977_en_5.1.2_3.0_1694775021489.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1977","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1977", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1977| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1977 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1978_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1978_en.md new file mode 100644 index 00000000000000..8c2d1c452b4aae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1978_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1978 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1978 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1978` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1978_en_5.1.2_3.0_1694775150025.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1978_en_5.1.2_3.0_1694775150025.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1978","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1978", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1978| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1978 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1979_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1979_en.md new file mode 100644 index 00000000000000..5c775e52179fa9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1979_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1979 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1979 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1979` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1979_en_5.1.2_3.0_1694775255167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1979_en_5.1.2_3.0_1694775255167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1979","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1979", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1979| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1979 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1980_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1980_en.md new file mode 100644 index 00000000000000..394924fa6c7408 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1980_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1980 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1980 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1980` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1980_en_5.1.2_3.0_1694775377281.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1980_en_5.1.2_3.0_1694775377281.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1980","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1980", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1980| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1980 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1981_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1981_en.md new file mode 100644 index 00000000000000..2fccf80b9b85f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1981_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1981 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1981 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1981` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1981_en_5.1.2_3.0_1694775483982.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1981_en_5.1.2_3.0_1694775483982.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1981","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1981", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1981| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1981 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1982_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1982_en.md new file mode 100644 index 00000000000000..d27b0f38611287 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1982_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1982 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1982 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1982` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1982_en_5.1.2_3.0_1694775591119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1982_en_5.1.2_3.0_1694775591119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1982","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1982", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1982| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1982 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1983_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1983_en.md new file mode 100644 index 00000000000000..d2641597f55424 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1983_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1983 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1983 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1983` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1983_en_5.1.2_3.0_1694775699603.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1983_en_5.1.2_3.0_1694775699603.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1983","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1983", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1983| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1983 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1984_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1984_en.md new file mode 100644 index 00000000000000..314e351c6b685d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1984_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1984 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1984 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1984` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1984_en_5.1.2_3.0_1694775808317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1984_en_5.1.2_3.0_1694775808317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1984","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1984", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1984| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1984 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1985_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1985_en.md new file mode 100644 index 00000000000000..d99d6f3e6cdd94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1985_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1985 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1985 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1985` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1985_en_5.1.2_3.0_1694775921031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1985_en_5.1.2_3.0_1694775921031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1985","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1985", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1985| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1985 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1986_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1986_en.md new file mode 100644 index 00000000000000..607337889bb71e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1986_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1986 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1986 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1986` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1986_en_5.1.2_3.0_1694776020069.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1986_en_5.1.2_3.0_1694776020069.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1986","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1986", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1986| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1986 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1987_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1987_en.md new file mode 100644 index 00000000000000..dba252f85cddde --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1987_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1987 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1987 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1987` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1987_en_5.1.2_3.0_1694776123514.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1987_en_5.1.2_3.0_1694776123514.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1987","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1987", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1987| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1987 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1988_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1988_en.md new file mode 100644 index 00000000000000..2a33f8a19c4b0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1988_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1988 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1988 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1988` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1988_en_5.1.2_3.0_1694776243529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1988_en_5.1.2_3.0_1694776243529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1988","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1988", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1988| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1988 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1989_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1989_en.md new file mode 100644 index 00000000000000..ffe9c4ab164292 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1989_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1989 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1989 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1989` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1989_en_5.1.2_3.0_1694776373157.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1989_en_5.1.2_3.0_1694776373157.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1989","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1989", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1989| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1989 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1990_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1990_en.md new file mode 100644 index 00000000000000..ea391432fdae25 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1990_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1990 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1990 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1990` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1990_en_5.1.2_3.0_1694776502740.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1990_en_5.1.2_3.0_1694776502740.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1990","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1990", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1990| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1990 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1991_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1991_en.md new file mode 100644 index 00000000000000..a6f9d10d89716b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1991_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1991 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1991 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1991` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1991_en_5.1.2_3.0_1694776614990.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1991_en_5.1.2_3.0_1694776614990.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1991","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1991", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1991| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.7 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1991 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1992_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1992_en.md new file mode 100644 index 00000000000000..591d008d964723 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1992_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1992 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1992 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1992` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1992_en_5.1.2_3.0_1694776717629.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1992_en_5.1.2_3.0_1694776717629.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1992","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1992", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1992| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1992 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1993_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1993_en.md new file mode 100644 index 00000000000000..393ce0853add0d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1993_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1993 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1993 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1993` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1993_en_5.1.2_3.0_1694776835046.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1993_en_5.1.2_3.0_1694776835046.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1993","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1993", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1993| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1993 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1994_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1994_en.md new file mode 100644 index 00000000000000..1568d8f62ac121 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1994_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1994 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1994 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1994` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1994_en_5.1.2_3.0_1694776961741.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1994_en_5.1.2_3.0_1694776961741.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1994","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1994", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1994| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1994 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1995_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1995_en.md new file mode 100644 index 00000000000000..f8405218b93ec2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1995_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1995 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1995 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1995` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1995_en_5.1.2_3.0_1694777092432.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1995_en_5.1.2_3.0_1694777092432.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1995","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1995", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1995| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1995 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1996_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1996_en.md new file mode 100644 index 00000000000000..8b7b3e69ac754a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1996_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1996 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1996 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1996` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1996_en_5.1.2_3.0_1694777218916.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1996_en_5.1.2_3.0_1694777218916.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1996","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1996", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1996| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1996 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1997_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1997_en.md new file mode 100644 index 00000000000000..74e5da87a82a1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1997_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1997 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1997 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1997` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1997_en_5.1.2_3.0_1694777344228.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1997_en_5.1.2_3.0_1694777344228.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1997","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1997", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1997| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1997 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1998_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1998_en.md new file mode 100644 index 00000000000000..180aa19d38bda3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1998_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1998 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1998 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1998` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1998_en_5.1.2_3.0_1694777463917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1998_en_5.1.2_3.0_1694777463917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1998","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1998", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1998| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1998 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1999_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1999_en.md new file mode 100644 index 00000000000000..a72f9d8336cf72 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_1999_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_1999 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_1999 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_1999` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1999_en_5.1.2_3.0_1694777589682.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_1999_en_5.1.2_3.0_1694777589682.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_1999","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_1999", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_1999| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_1999 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2000_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2000_en.md new file mode 100644 index 00000000000000..c19707e701a55a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2000_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2000 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2000 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2000` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2000_en_5.1.2_3.0_1694777690320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2000_en_5.1.2_3.0_1694777690320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2000","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2000", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2000| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.0 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2000 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2001_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2001_en.md new file mode 100644 index 00000000000000..a5e3f7c2ad4ec8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2001_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2001 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2001 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2001` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2001_en_5.1.2_3.0_1694777805767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2001_en_5.1.2_3.0_1694777805767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2001","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2001", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2001| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2001 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2002_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2002_en.md new file mode 100644 index 00000000000000..413ea42da61b12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2002_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2002 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2002 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2002` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2002_en_5.1.2_3.0_1694777907120.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2002_en_5.1.2_3.0_1694777907120.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2002","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2002", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2002| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2002 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2003_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2003_en.md new file mode 100644 index 00000000000000..398c4ecae0fbd1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2003 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2003 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2003` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2003_en_5.1.2_3.0_1694778027263.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2003_en_5.1.2_3.0_1694778027263.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2003| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2004_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2004_en.md new file mode 100644 index 00000000000000..e5a2eabc09dae3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2004_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2004 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2004 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2004` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2004_en_5.1.2_3.0_1694778149385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2004_en_5.1.2_3.0_1694778149385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2004","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2004", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2004| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2004 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2005_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2005_en.md new file mode 100644 index 00000000000000..c51cb72b77b2eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2005_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2005 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2005 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2005` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2005_en_5.1.2_3.0_1694778259096.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2005_en_5.1.2_3.0_1694778259096.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2005","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2005", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2005| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2005 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2006_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2006_en.md new file mode 100644 index 00000000000000..0fed6891c3d96e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2006_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2006 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2006 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2006` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2006_en_5.1.2_3.0_1694778368628.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2006_en_5.1.2_3.0_1694778368628.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2006","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2006", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2006| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2006 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2007_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2007_en.md new file mode 100644 index 00000000000000..c95f16af9274d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2007_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2007 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2007 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2007` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2007_en_5.1.2_3.0_1694778485519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2007_en_5.1.2_3.0_1694778485519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2007","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2007", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2007| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2007 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2008_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2008_en.md new file mode 100644 index 00000000000000..67d6fdd800fa49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2008_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2008 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2008 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2008` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2008_en_5.1.2_3.0_1694778619433.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2008_en_5.1.2_3.0_1694778619433.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2008","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2008", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2008| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2008 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2009_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2009_en.md new file mode 100644 index 00000000000000..8010cbeac8f448 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2009_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2009 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2009 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2009` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2009_en_5.1.2_3.0_1694778757644.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2009_en_5.1.2_3.0_1694778757644.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2009","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2009", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2009| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2009 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2010_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2010_en.md new file mode 100644 index 00000000000000..8cf2ab3b538bcd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2010_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2010 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2010 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2010` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2010_en_5.1.2_3.0_1694778898284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2010_en_5.1.2_3.0_1694778898284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2010","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2010", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2010| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2010 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2011_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2011_en.md new file mode 100644 index 00000000000000..5b1cef748e41a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2011_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2011 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2011 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2011` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2011_en_5.1.2_3.0_1694779011031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2011_en_5.1.2_3.0_1694779011031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2011","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2011", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2011| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2011 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2012_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2012_en.md new file mode 100644 index 00000000000000..a7b5f2e4d2874b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2012_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2012 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2012 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2012` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2012_en_5.1.2_3.0_1694779142430.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2012_en_5.1.2_3.0_1694779142430.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2012","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2012", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2012| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2012 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2013_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2013_en.md new file mode 100644 index 00000000000000..2e8545fe1d7c3a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2013_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2013 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2013 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2013` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2013_en_5.1.2_3.0_1694779243866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2013_en_5.1.2_3.0_1694779243866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2013","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2013", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2013| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2013 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2014_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2014_en.md new file mode 100644 index 00000000000000..adca6b77377c91 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2014_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2014 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2014 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2014` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2014_en_5.1.2_3.0_1694779355175.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2014_en_5.1.2_3.0_1694779355175.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2014","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2014", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2014| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2014 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2015_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2015_en.md new file mode 100644 index 00000000000000..9a0790e9d5b221 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2015_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2015 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2015 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2015` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2015_en_5.1.2_3.0_1694779488434.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2015_en_5.1.2_3.0_1694779488434.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2015","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2015", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2015| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2015 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2016_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2016_en.md new file mode 100644 index 00000000000000..a3f53f8b44e445 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2016_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2016 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2016 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2016` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2016_en_5.1.2_3.0_1694779636190.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2016_en_5.1.2_3.0_1694779636190.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2016","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2016", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2016| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|249.1 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2016 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2018_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2018_en.md new file mode 100644 index 00000000000000..6592a4d95a20f9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2018_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2018 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2018 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2018` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2018_en_5.1.2_3.0_1694785478792.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2018_en_5.1.2_3.0_1694785478792.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2018","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2018", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2018| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2018 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2019_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2019_en.md new file mode 100644 index 00000000000000..22cc6b73354023 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2019_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2019 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2019 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2019` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2019_en_5.1.2_3.0_1694785606974.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2019_en_5.1.2_3.0_1694785606974.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2019","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2019", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2019| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.8 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2019 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2020_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2020_en.md new file mode 100644 index 00000000000000..65676e6d915678 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2020_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2020 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2020 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2020` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2020_en_5.1.2_3.0_1694785723737.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2020_en_5.1.2_3.0_1694785723737.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2020","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2020", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2020| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2020 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2021_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2021_en.md new file mode 100644 index 00000000000000..237d768698138d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2021_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2021 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2021 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2021` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2021_en_5.1.2_3.0_1694785831607.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2021_en_5.1.2_3.0_1694785831607.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2021","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2021", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2021| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2021 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2022_en.md b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2022_en.md new file mode 100644 index 00000000000000..2b3a06b24d23cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-we4lkd_aml_distilbert_1921_2022_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English we4lkd_aml_distilbert_1921_2022 DistilBertEmbeddings from matheusvolpon +author: John Snow Labs +name: we4lkd_aml_distilbert_1921_2022 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`we4lkd_aml_distilbert_1921_2022` is a English model originally trained by matheusvolpon. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2022_en_5.1.2_3.0_1694785944436.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/we4lkd_aml_distilbert_1921_2022_en_5.1.2_3.0_1694785944436.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("we4lkd_aml_distilbert_1921_2022","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("we4lkd_aml_distilbert_1921_2022", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|we4lkd_aml_distilbert_1921_2022| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|248.9 MB| + +## References + +https://huggingface.co/matheusvolpon/WE4LKD_AML_distilbert_1921_2022 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-yolochess_mlm_azure_cloud_35_en.md b/docs/_posts/ahmedlone127/2023-09-15-yolochess_mlm_azure_cloud_35_en.md new file mode 100644 index 00000000000000..602f37319c98d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-yolochess_mlm_azure_cloud_35_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English yolochess_mlm_azure_cloud_35 DistilBertEmbeddings from jrahn +author: John Snow Labs +name: yolochess_mlm_azure_cloud_35 +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yolochess_mlm_azure_cloud_35` is a English model originally trained by jrahn. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yolochess_mlm_azure_cloud_35_en_5.1.2_3.0_1694790564963.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yolochess_mlm_azure_cloud_35_en_5.1.2_3.0_1694790564963.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("yolochess_mlm_azure_cloud_35","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("yolochess_mlm_azure_cloud_35", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yolochess_mlm_azure_cloud_35| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|244.3 MB| + +## References + +https://huggingface.co/jrahn/yolochess_mlm_azure-cloud-35 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-15-zero_shot_cross_lingual_transfer_demo_masked_en.md b/docs/_posts/ahmedlone127/2023-09-15-zero_shot_cross_lingual_transfer_demo_masked_en.md new file mode 100644 index 00000000000000..b36a9314ab6920 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-15-zero_shot_cross_lingual_transfer_demo_masked_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English zero_shot_cross_lingual_transfer_demo_masked DistilBertEmbeddings from zzzotop +author: John Snow Labs +name: zero_shot_cross_lingual_transfer_demo_masked +date: 2023-09-15 +tags: [distilbert, en, open_source, fill_mask, onnx] +task: Embeddings +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: DistilBertEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained DistilBertEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`zero_shot_cross_lingual_transfer_demo_masked` is a English model originally trained by zzzotop. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/zero_shot_cross_lingual_transfer_demo_masked_en_5.1.2_3.0_1694784022746.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/zero_shot_cross_lingual_transfer_demo_masked_en_5.1.2_3.0_1694784022746.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +embeddings =DistilBertEmbeddings.pretrained("zero_shot_cross_lingual_transfer_demo_masked","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val embeddings = DistilBertEmbeddings + .pretrained("zero_shot_cross_lingual_transfer_demo_masked", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|zero_shot_cross_lingual_transfer_demo_masked| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[embeddings]| +|Language:|en| +|Size:|505.4 MB| + +## References + +https://huggingface.co/zzzotop/zero-shot-cross-lingual-transfer-demo-masked \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_climate_claim_related_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_climate_claim_related_en.md new file mode 100644 index 00000000000000..a6212234777de3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_climate_claim_related_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_climate_claim_related AlbertForSequenceClassification from mwong +author: John Snow Labs +name: albert_base_climate_claim_related +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_climate_claim_related` is a English model originally trained by mwong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_climate_claim_related_en_5.1.2_3.0_1695061577335.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_climate_claim_related_en_5.1.2_3.0_1695061577335.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_climate_claim_related","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_climate_claim_related", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_climate_claim_related| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/mwong/albert-base-climate-claim-related \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_fever_claim_related_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_fever_claim_related_en.md new file mode 100644 index 00000000000000..505171c013ff19 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_fever_claim_related_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_fever_claim_related AlbertForSequenceClassification from mwong +author: John Snow Labs +name: albert_base_fever_claim_related +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_fever_claim_related` is a English model originally trained by mwong. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_fever_claim_related_en_5.1.2_3.0_1695068402270.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_fever_claim_related_en_5.1.2_3.0_1695068402270.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_fever_claim_related","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_fever_claim_related", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_fever_claim_related| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/mwong/albert-base-fever-claim-related \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_mnli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_mnli_en.md new file mode 100644 index 00000000000000..5278d63f1353da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_mnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_mnli AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_base_mnli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_mnli` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_mnli_en_5.1.2_3.0_1695066395250.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_mnli_en_5.1.2_3.0_1695066395250.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_mnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_mnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_mnli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/tals/albert-base-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_quora_classifier_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_quora_classifier_en.md new file mode 100644 index 00000000000000..c7bc53ba3d193c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_quora_classifier_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_quora_classifier AlbertForSequenceClassification from pertschuk +author: John Snow Labs +name: albert_base_quora_classifier +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_quora_classifier` is a English model originally trained by pertschuk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_quora_classifier_en_5.1.2_3.0_1695065865157.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_quora_classifier_en_5.1.2_3.0_1695065865157.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_quora_classifier","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_quora_classifier", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_quora_classifier| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/pertschuk/albert-base-quora-classifier \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v1_mnli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v1_mnli_en.md new file mode 100644 index 00000000000000..90eeea4f8c5447 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v1_mnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v1_mnli AlbertForSequenceClassification from prajjwal1 +author: John Snow Labs +name: albert_base_v1_mnli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v1_mnli` is a English model originally trained by prajjwal1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v1_mnli_en_5.1.2_3.0_1695066228309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v1_mnli_en_5.1.2_3.0_1695066228309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v1_mnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v1_mnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v1_mnli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/prajjwal1/albert-base-v1-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v1_prop_16_labeled_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v1_prop_16_labeled_en.md new file mode 100644 index 00000000000000..ac660de2078629 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v1_prop_16_labeled_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v1_prop_16_labeled AlbertForSequenceClassification from kellyz +author: John Snow Labs +name: albert_base_v1_prop_16_labeled +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v1_prop_16_labeled` is a English model originally trained by kellyz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v1_prop_16_labeled_en_5.1.2_3.0_1695062747273.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v1_prop_16_labeled_en_5.1.2_3.0_1695062747273.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v1_prop_16_labeled","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v1_prop_16_labeled", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v1_prop_16_labeled| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/kellyz/albert-base-v1-prop-16-labeled \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_ag_news_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_ag_news_en.md new file mode 100644 index 00000000000000..a3630af6aa4e34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_ag_news_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_ag_news AlbertForSequenceClassification from textattack +author: John Snow Labs +name: albert_base_v2_ag_news +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_ag_news` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_ag_news_en_5.1.2_3.0_1695067426273.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_ag_news_en_5.1.2_3.0_1695067426273.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_ag_news","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_ag_news", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_ag_news| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-ag-news \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_filtered_0609_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_filtered_0609_en.md new file mode 100644 index 00000000000000..708b2f022eb8dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_filtered_0609_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_finetuned_filtered_0609 AlbertForSequenceClassification from YeRyeongLee +author: John Snow Labs +name: albert_base_v2_finetuned_filtered_0609 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_finetuned_filtered_0609` is a English model originally trained by YeRyeongLee. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_filtered_0609_en_5.1.2_3.0_1695061973284.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_filtered_0609_en_5.1.2_3.0_1695061973284.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_finetuned_filtered_0609","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_finetuned_filtered_0609", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_finetuned_filtered_0609| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/YeRyeongLee/albert-base-v2-finetuned-filtered-0609 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_mrpc_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_mrpc_en.md new file mode 100644 index 00000000000000..2364320eee458f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_mrpc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_finetuned_mrpc AlbertForSequenceClassification from VitaliiVrublevskyi +author: John Snow Labs +name: albert_base_v2_finetuned_mrpc +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_finetuned_mrpc` is a English model originally trained by VitaliiVrublevskyi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_mrpc_en_5.1.2_3.0_1695069182875.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_mrpc_en_5.1.2_3.0_1695069182875.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_finetuned_mrpc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_finetuned_mrpc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_finetuned_mrpc| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/VitaliiVrublevskyi/albert-base-v2-finetuned-mrpc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_tweets_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_tweets_en.md new file mode 100644 index 00000000000000..66bd3c8cf19e7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_finetuned_tweets_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_finetuned_tweets AlbertForSequenceClassification from minimax123 +author: John Snow Labs +name: albert_base_v2_finetuned_tweets +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_finetuned_tweets` is a English model originally trained by minimax123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_tweets_en_5.1.2_3.0_1695063210980.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_tweets_en_5.1.2_3.0_1695063210980.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_finetuned_tweets","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_finetuned_tweets", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_finetuned_tweets| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/minimax123/albert-base-v2-finetuned-tweets \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_def_v1_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_def_v1_en.md new file mode 100644 index 00000000000000..9bc9e5d70541fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_def_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_hoax_classifier_def_v1 AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_base_v2_hoax_classifier_def_v1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_hoax_classifier_def_v1` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_hoax_classifier_def_v1_en_5.1.2_3.0_1695068148950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_hoax_classifier_def_v1_en_5.1.2_3.0_1695068148950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_hoax_classifier_def_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_hoax_classifier_def_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_hoax_classifier_def_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/research-dump/albert-base-v2_hoax_classifier_def_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_fulltext_v1_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_fulltext_v1_en.md new file mode 100644 index 00000000000000..ce942473420558 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_fulltext_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_hoax_classifier_fulltext_v1 AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_base_v2_hoax_classifier_fulltext_v1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_hoax_classifier_fulltext_v1` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_hoax_classifier_fulltext_v1_en_5.1.2_3.0_1695068548526.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_hoax_classifier_fulltext_v1_en_5.1.2_3.0_1695068548526.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_hoax_classifier_fulltext_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_hoax_classifier_fulltext_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_hoax_classifier_fulltext_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/research-dump/albert-base-v2_hoax_classifier_fulltext_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_v1_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_v1_en.md new file mode 100644 index 00000000000000..74f6a6013e89b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_hoax_classifier_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_hoax_classifier_v1 AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_base_v2_hoax_classifier_v1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_hoax_classifier_v1` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_hoax_classifier_v1_en_5.1.2_3.0_1695067713824.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_hoax_classifier_v1_en_5.1.2_3.0_1695067713824.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_hoax_classifier_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_hoax_classifier_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_hoax_classifier_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/research-dump/albert-base-v2_hoax_classifier_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_imdb_textattack_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_imdb_textattack_en.md new file mode 100644 index 00000000000000..4dd408aec3a812 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_imdb_textattack_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_imdb_textattack AlbertForSequenceClassification from textattack +author: John Snow Labs +name: albert_base_v2_imdb_textattack +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_imdb_textattack` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_imdb_textattack_en_5.1.2_3.0_1695067513920.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_imdb_textattack_en_5.1.2_3.0_1695067513920.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_imdb_textattack","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_imdb_textattack", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_imdb_textattack| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-imdb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_jomart_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_jomart_en.md new file mode 100644 index 00000000000000..e72ab43d003cfa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_jomart_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_jomart AlbertForSequenceClassification from JoMart +author: John Snow Labs +name: albert_base_v2_jomart +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_jomart` is a English model originally trained by JoMart. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_jomart_en_5.1.2_3.0_1695061760946.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_jomart_en_5.1.2_3.0_1695061760946.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_jomart","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_jomart", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_jomart| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/JoMart/albert-base-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mbti_classification_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mbti_classification_en.md new file mode 100644 index 00000000000000..b1bf2342231e4d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mbti_classification_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_mbti_classification AlbertForSequenceClassification from JanSt +author: John Snow Labs +name: albert_base_v2_mbti_classification +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_mbti_classification` is a English model originally trained by JanSt. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_mbti_classification_en_5.1.2_3.0_1695067863364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_mbti_classification_en_5.1.2_3.0_1695067863364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_mbti_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_mbti_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_mbti_classification| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/JanSt/albert-base-v2_mbti-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_alireza1044_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_alireza1044_en.md new file mode 100644 index 00000000000000..8b5a92aa26cf2a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_alireza1044_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_mnli_alireza1044 AlbertForSequenceClassification from Alireza1044 +author: John Snow Labs +name: albert_base_v2_mnli_alireza1044 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_mnli_alireza1044` is a English model originally trained by Alireza1044. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_mnli_alireza1044_en_5.1.2_3.0_1695061648302.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_mnli_alireza1044_en_5.1.2_3.0_1695061648302.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_mnli_alireza1044","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_mnli_alireza1044", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_mnli_alireza1044| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Alireza1044/albert-base-v2-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_prajjwal1_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_prajjwal1_en.md new file mode 100644 index 00000000000000..f53a0ecc6afe12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_prajjwal1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_mnli_prajjwal1 AlbertForSequenceClassification from prajjwal1 +author: John Snow Labs +name: albert_base_v2_mnli_prajjwal1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_mnli_prajjwal1` is a English model originally trained by prajjwal1. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_mnli_prajjwal1_en_5.1.2_3.0_1695066316371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_mnli_prajjwal1_en_5.1.2_3.0_1695066316371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_mnli_prajjwal1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_mnli_prajjwal1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_mnli_prajjwal1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/prajjwal1/albert-base-v2-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_tehrannlp_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_tehrannlp_en.md new file mode 100644 index 00000000000000..c5fd01aa5c62b4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_mnli_tehrannlp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_mnli_tehrannlp AlbertForSequenceClassification from TehranNLP +author: John Snow Labs +name: albert_base_v2_mnli_tehrannlp +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_mnli_tehrannlp` is a English model originally trained by TehranNLP. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_mnli_tehrannlp_en_5.1.2_3.0_1695063412933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_mnli_tehrannlp_en_5.1.2_3.0_1695063412933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_mnli_tehrannlp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_mnli_tehrannlp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_mnli_tehrannlp| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/TehranNLP/albert-base-v2-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_offenseval2019_downsample_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_offenseval2019_downsample_en.md new file mode 100644 index 00000000000000..e8239559ae34aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_offenseval2019_downsample_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_offenseval2019_downsample AlbertForSequenceClassification from mohsenfayyaz +author: John Snow Labs +name: albert_base_v2_offenseval2019_downsample +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_offenseval2019_downsample` is a English model originally trained by mohsenfayyaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_offenseval2019_downsample_en_5.1.2_3.0_1695065586767.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_offenseval2019_downsample_en_5.1.2_3.0_1695065586767.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_offenseval2019_downsample","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_offenseval2019_downsample", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_offenseval2019_downsample| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/mohsenfayyaz/albert-base-v2-offenseval2019-downsample \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_pub_section_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_pub_section_en.md new file mode 100644 index 00000000000000..f49e48c3917cd0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_pub_section_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_pub_section AlbertForSequenceClassification from ml4pubmed +author: John Snow Labs +name: albert_base_v2_pub_section +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_pub_section` is a English model originally trained by ml4pubmed. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_pub_section_en_5.1.2_3.0_1695061672080.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_pub_section_en_5.1.2_3.0_1695061672080.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_pub_section","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_pub_section", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_pub_section| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|25.1 MB| + +## References + +https://huggingface.co/ml4pubmed/albert-base-v2_pub_section \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_qnli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_qnli_en.md new file mode 100644 index 00000000000000..63d14a145614d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_qnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_qnli AlbertForSequenceClassification from Alireza1044 +author: John Snow Labs +name: albert_base_v2_qnli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_qnli` is a English model originally trained by Alireza1044. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_qnli_en_5.1.2_3.0_1695061776604.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_qnli_en_5.1.2_3.0_1695061776604.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_qnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_qnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_qnli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Alireza1044/albert-base-v2-qnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_qqp_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_qqp_en.md new file mode 100644 index 00000000000000..7922dd26527313 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_qqp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_qqp AlbertForSequenceClassification from Alireza1044 +author: John Snow Labs +name: albert_base_v2_qqp +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_qqp` is a English model originally trained by Alireza1044. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_qqp_en_5.1.2_3.0_1695061839213.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_qqp_en_5.1.2_3.0_1695061839213.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_qqp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_qqp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_qqp| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Alireza1044/albert-base-v2-qqp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_rotten_tomatoes_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_rotten_tomatoes_en.md new file mode 100644 index 00000000000000..73b5e05dfda613 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_rotten_tomatoes_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_rotten_tomatoes AlbertForSequenceClassification from textattack +author: John Snow Labs +name: albert_base_v2_rotten_tomatoes +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_rotten_tomatoes` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_rotten_tomatoes_en_5.1.2_3.0_1695067594715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_rotten_tomatoes_en_5.1.2_3.0_1695067594715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_rotten_tomatoes","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_rotten_tomatoes", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_rotten_tomatoes| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-rotten-tomatoes \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_snli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_snli_en.md new file mode 100644 index 00000000000000..b81362e9aac630 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_snli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_snli AlbertForSequenceClassification from textattack +author: John Snow Labs +name: albert_base_v2_snli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_snli` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_snli_en_5.1.2_3.0_1695067672178.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_snli_en_5.1.2_3.0_1695067672178.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_snli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_snli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_snli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-snli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_sst_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_sst_2_en.md new file mode 100644 index 00000000000000..a73efaa1d2ff3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_sst_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_sst_2 AlbertForSequenceClassification from textattack +author: John Snow Labs +name: albert_base_v2_sst_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_sst_2` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_sst_2_en_5.1.2_3.0_1695067258811.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_sst_2_en_5.1.2_3.0_1695067258811.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_sst_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_sst_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_sst_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-SST-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_sts_b_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_sts_b_en.md new file mode 100644 index 00000000000000..861685872206fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_sts_b_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_sts_b AlbertForSequenceClassification from textattack +author: John Snow Labs +name: albert_base_v2_sts_b +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_sts_b` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_sts_b_en_5.1.2_3.0_1695067330382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_sts_b_en_5.1.2_3.0_1695067330382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_sts_b","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_sts_b", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_sts_b| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-STS-B \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_toxicity_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_toxicity_en.md new file mode 100644 index 00000000000000..a532f37e60c9d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_toxicity_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_toxicity AlbertForSequenceClassification from mohsenfayyaz +author: John Snow Labs +name: albert_base_v2_toxicity +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_toxicity` is a English model originally trained by mohsenfayyaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_toxicity_en_5.1.2_3.0_1695065658829.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_toxicity_en_5.1.2_3.0_1695065658829.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_toxicity","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_toxicity", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_toxicity| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/mohsenfayyaz/albert-base-v2-toxicity \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_tweet_about_disaster_or_not_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_tweet_about_disaster_or_not_en.md new file mode 100644 index 00000000000000..7ce224b039e5b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_tweet_about_disaster_or_not_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_tweet_about_disaster_or_not AlbertForSequenceClassification from DunnBC22 +author: John Snow Labs +name: albert_base_v2_tweet_about_disaster_or_not +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_tweet_about_disaster_or_not` is a English model originally trained by DunnBC22. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_tweet_about_disaster_or_not_en_5.1.2_3.0_1695067141950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_tweet_about_disaster_or_not_en_5.1.2_3.0_1695067141950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_tweet_about_disaster_or_not","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_tweet_about_disaster_or_not", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_tweet_about_disaster_or_not| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/DunnBC22/albert-base-v2-Tweet_About_Disaster_Or_Not \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_wnli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_wnli_en.md new file mode 100644 index 00000000000000..7ff34fca41aca3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_wnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_wnli AlbertForSequenceClassification from Alireza1044 +author: John Snow Labs +name: albert_base_v2_wnli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_wnli` is a English model originally trained by Alireza1044. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_wnli_en_5.1.2_3.0_1695062146368.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_wnli_en_5.1.2_3.0_1695062146368.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_wnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_wnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_wnli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Alireza1044/albert-base-v2-wnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_yelp_polarity_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_yelp_polarity_en.md new file mode 100644 index 00000000000000..47822a118654d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_v2_yelp_polarity_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_yelp_polarity AlbertForSequenceClassification from textattack +author: John Snow Labs +name: albert_base_v2_yelp_polarity +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_yelp_polarity` is a English model originally trained by textattack. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_yelp_polarity_en_5.1.2_3.0_1695067750262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_yelp_polarity_en_5.1.2_3.0_1695067750262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_v2_yelp_polarity","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_v2_yelp_polarity", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_yelp_polarity| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/textattack/albert-base-v2-yelp-polarity \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_en.md new file mode 100644 index 00000000000000..085b308008be09 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_vitaminc AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_base_vitaminc +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_vitaminc` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_en_5.1.2_3.0_1695066643732.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_en_5.1.2_3.0_1695066643732.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_vitaminc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_vitaminc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_vitaminc| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/tals/albert-base-vitaminc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_fever_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_fever_en.md new file mode 100644 index 00000000000000..995f314cc2a6ac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_fever_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_vitaminc_fever AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_base_vitaminc_fever +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_vitaminc_fever` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_fever_en_5.1.2_3.0_1695066483165.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_fever_en_5.1.2_3.0_1695066483165.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_vitaminc_fever","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_vitaminc_fever", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_vitaminc_fever| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/tals/albert-base-vitaminc-fever \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_flagging_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_flagging_en.md new file mode 100644 index 00000000000000..f74aecf4d3aa1c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_flagging_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_vitaminc_flagging AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_base_vitaminc_flagging +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_vitaminc_flagging` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_flagging_en_5.1.2_3.0_1695066739098.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_flagging_en_5.1.2_3.0_1695066739098.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_vitaminc_flagging","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_vitaminc_flagging", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_vitaminc_flagging| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/tals/albert-base-vitaminc_flagging \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_mnli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_mnli_en.md new file mode 100644 index 00000000000000..63b6b3099ca3bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_mnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_vitaminc_mnli AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_base_vitaminc_mnli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_vitaminc_mnli` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_mnli_en_5.1.2_3.0_1695066560291.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_mnli_en_5.1.2_3.0_1695066560291.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_vitaminc_mnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_vitaminc_mnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_vitaminc_mnli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/tals/albert-base-vitaminc-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_wnei_fever_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_wnei_fever_en.md new file mode 100644 index 00000000000000..eb49e55fe2fd70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_base_vitaminc_wnei_fever_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_vitaminc_wnei_fever AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_base_vitaminc_wnei_fever +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_vitaminc_wnei_fever` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_wnei_fever_en_5.1.2_3.0_1695066829743.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_vitaminc_wnei_fever_en_5.1.2_3.0_1695066829743.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_base_vitaminc_wnei_fever","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_base_vitaminc_wnei_fever", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_vitaminc_wnei_fever| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/tals/albert-base-vitaminc_wnei-fever \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_dnd_intents_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_dnd_intents_en.md new file mode 100644 index 00000000000000..ed0c8f541ef5dc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_dnd_intents_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_dnd_intents AlbertForSequenceClassification from neurae +author: John Snow Labs +name: albert_dnd_intents +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_dnd_intents` is a English model originally trained by neurae. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_dnd_intents_en_5.1.2_3.0_1695065350093.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_dnd_intents_en_5.1.2_3.0_1695065350093.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_dnd_intents","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_dnd_intents", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_dnd_intents| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/neurae/albert-dnd-intents \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_for_math_arabic_base_ft_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_for_math_arabic_base_ft_en.md new file mode 100644 index 00000000000000..307744326961ae --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_for_math_arabic_base_ft_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_for_math_arabic_base_ft AlbertForSequenceClassification from AnReu +author: John Snow Labs +name: albert_for_math_arabic_base_ft +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_for_math_arabic_base_ft` is a English model originally trained by AnReu. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_for_math_arabic_base_ft_en_5.1.2_3.0_1695068267186.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_for_math_arabic_base_ft_en_5.1.2_3.0_1695068267186.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_for_math_arabic_base_ft","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_for_math_arabic_base_ft", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_for_math_arabic_base_ft| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/AnReu/albert-for-math-ar-base-ft \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_for_multilabel_sentence_classification_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_for_multilabel_sentence_classification_en.md new file mode 100644 index 00000000000000..8bd4402b2f3e3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_for_multilabel_sentence_classification_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_for_multilabel_sentence_classification AlbertForSequenceClassification from Zamachi +author: John Snow Labs +name: albert_for_multilabel_sentence_classification +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_for_multilabel_sentence_classification` is a English model originally trained by Zamachi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_for_multilabel_sentence_classification_en_5.1.2_3.0_1695062063382.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_for_multilabel_sentence_classification_en_5.1.2_3.0_1695062063382.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_for_multilabel_sentence_classification","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_for_multilabel_sentence_classification", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_for_multilabel_sentence_classification| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Zamachi/albert-for-multilabel-sentence-classification \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_goodnotes_reddit_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_goodnotes_reddit_en.md new file mode 100644 index 00000000000000..a64b93b23e9df6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_goodnotes_reddit_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_goodnotes_reddit AlbertForSequenceClassification from vionwinnie +author: John Snow Labs +name: albert_goodnotes_reddit +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_goodnotes_reddit` is a English model originally trained by vionwinnie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_goodnotes_reddit_en_5.1.2_3.0_1695067820028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_goodnotes_reddit_en_5.1.2_3.0_1695067820028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_goodnotes_reddit","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_goodnotes_reddit", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_goodnotes_reddit| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/vionwinnie/albert-goodnotes-reddit \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cls_sst2_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cls_sst2_en.md new file mode 100644 index 00000000000000..cff887daeee16c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cls_sst2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_cls_sst2 AlbertForSequenceClassification from ghatgetanuj +author: John Snow Labs +name: albert_large_v2_cls_sst2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_cls_sst2` is a English model originally trained by ghatgetanuj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_cls_sst2_en_5.1.2_3.0_1695064661881.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_cls_sst2_en_5.1.2_3.0_1695064661881.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_cls_sst2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_cls_sst2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_cls_sst2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/ghatgetanuj/albert-large-v2_cls_sst2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cls_subj_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cls_subj_en.md new file mode 100644 index 00000000000000..0d1919fddb32dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cls_subj_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_cls_subj AlbertForSequenceClassification from ghatgetanuj +author: John Snow Labs +name: albert_large_v2_cls_subj +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_cls_subj` is a English model originally trained by ghatgetanuj. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_cls_subj_en_5.1.2_3.0_1695064394094.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_cls_subj_en_5.1.2_3.0_1695064394094.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_cls_subj","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_cls_subj", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_cls_subj| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/ghatgetanuj/albert-large-v2_cls_subj \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cola_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cola_en.md new file mode 100644 index 00000000000000..719b957bc3223e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_cola_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_cola AlbertForSequenceClassification from WeightWatcher +author: John Snow Labs +name: albert_large_v2_cola +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_cola` is a English model originally trained by WeightWatcher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_cola_en_5.1.2_3.0_1695066562573.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_cola_en_5.1.2_3.0_1695066562573.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_cola","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_cola", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_cola| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/WeightWatcher/albert-large-v2-cola \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_hoax_classifier_def_v1_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_hoax_classifier_def_v1_en.md new file mode 100644 index 00000000000000..3834e314d84f97 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_hoax_classifier_def_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_hoax_classifier_def_v1 AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_large_v2_hoax_classifier_def_v1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_hoax_classifier_def_v1` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_hoax_classifier_def_v1_en_5.1.2_3.0_1695068224689.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_hoax_classifier_def_v1_en_5.1.2_3.0_1695068224689.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_hoax_classifier_def_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_hoax_classifier_def_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_hoax_classifier_def_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/research-dump/albert-large-v2_hoax_classifier_def_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_mnli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_mnli_en.md new file mode 100644 index 00000000000000..02af71cabd98a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_mnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_mnli AlbertForSequenceClassification from WeightWatcher +author: John Snow Labs +name: albert_large_v2_mnli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_mnli` is a English model originally trained by WeightWatcher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_mnli_en_5.1.2_3.0_1695066654869.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_mnli_en_5.1.2_3.0_1695066654869.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_mnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_mnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_mnli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/WeightWatcher/albert-large-v2-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_mrpc_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_mrpc_en.md new file mode 100644 index 00000000000000..48766658afa8c7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_mrpc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_mrpc AlbertForSequenceClassification from WeightWatcher +author: John Snow Labs +name: albert_large_v2_mrpc +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_mrpc` is a English model originally trained by WeightWatcher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_mrpc_en_5.1.2_3.0_1695066472886.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_mrpc_en_5.1.2_3.0_1695066472886.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_mrpc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_mrpc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_mrpc| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/WeightWatcher/albert-large-v2-mrpc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_qnli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_qnli_en.md new file mode 100644 index 00000000000000..9e1b7bb4ba4201 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_qnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_qnli AlbertForSequenceClassification from WeightWatcher +author: John Snow Labs +name: albert_large_v2_qnli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_qnli` is a English model originally trained by WeightWatcher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_qnli_en_5.1.2_3.0_1695066763969.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_qnli_en_5.1.2_3.0_1695066763969.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_qnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_qnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_qnli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/WeightWatcher/albert-large-v2-qnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_qqp_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_qqp_en.md new file mode 100644 index 00000000000000..c3f4f62c4aa2e4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_qqp_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_qqp AlbertForSequenceClassification from WeightWatcher +author: John Snow Labs +name: albert_large_v2_qqp +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_qqp` is a English model originally trained by WeightWatcher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_qqp_en_5.1.2_3.0_1695066850082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_qqp_en_5.1.2_3.0_1695066850082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_qqp","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_qqp", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_qqp| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/WeightWatcher/albert-large-v2-qqp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_rte_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_rte_en.md new file mode 100644 index 00000000000000..0ac44be6017663 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_rte_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_rte AlbertForSequenceClassification from WeightWatcher +author: John Snow Labs +name: albert_large_v2_rte +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_rte` is a English model originally trained by WeightWatcher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_rte_en_5.1.2_3.0_1695066936099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_rte_en_5.1.2_3.0_1695066936099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_rte","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_rte", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_rte| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/WeightWatcher/albert-large-v2-rte \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_sst2_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_sst2_en.md new file mode 100644 index 00000000000000..d8a7921ccca498 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_sst2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_sst2 AlbertForSequenceClassification from WeightWatcher +author: John Snow Labs +name: albert_large_v2_sst2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_sst2` is a English model originally trained by WeightWatcher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_sst2_en_5.1.2_3.0_1695067014193.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_sst2_en_5.1.2_3.0_1695067014193.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_sst2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_sst2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_sst2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/WeightWatcher/albert-large-v2-sst2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_stsb_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_stsb_en.md new file mode 100644 index 00000000000000..32364433ef5cc6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_large_v2_stsb_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_stsb AlbertForSequenceClassification from WeightWatcher +author: John Snow Labs +name: albert_large_v2_stsb +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_stsb` is a English model originally trained by WeightWatcher. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_stsb_en_5.1.2_3.0_1695067083135.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_stsb_en_5.1.2_3.0_1695067083135.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_large_v2_stsb","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_large_v2_stsb", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_stsb| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|66.7 MB| + +## References + +https://huggingface.co/WeightWatcher/albert-large-v2-stsb \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_model_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_model_en.md new file mode 100644 index 00000000000000..22aed4749bd8a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_model AlbertForSequenceClassification from fffffly +author: John Snow Labs +name: albert_model +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_model` is a English model originally trained by fffffly. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_model_en_5.1.2_3.0_1695065474100.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_model_en_5.1.2_3.0_1695065474100.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_model","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_model", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/fffffly/albert_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_offensive_lm_tapt_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_offensive_lm_tapt_finetuned_en.md new file mode 100644 index 00000000000000..e723a430573b6d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_offensive_lm_tapt_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_offensive_lm_tapt_finetuned AlbertForSequenceClassification from k4black +author: John Snow Labs +name: albert_offensive_lm_tapt_finetuned +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_offensive_lm_tapt_finetuned` is a English model originally trained by k4black. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_offensive_lm_tapt_finetuned_en_5.1.2_3.0_1695064045496.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_offensive_lm_tapt_finetuned_en_5.1.2_3.0_1695064045496.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_offensive_lm_tapt_finetuned","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_offensive_lm_tapt_finetuned", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_offensive_lm_tapt_finetuned| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/k4black/albert-offensive-lm-tapt-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_small_kor_cross_encoder_v1_ko.md b/docs/_posts/ahmedlone127/2023-09-18-albert_small_kor_cross_encoder_v1_ko.md new file mode 100644 index 00000000000000..3fe906680d4eb8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_small_kor_cross_encoder_v1_ko.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Korean albert_small_kor_cross_encoder_v1 AlbertForSequenceClassification from bongsoo +author: John Snow Labs +name: albert_small_kor_cross_encoder_v1 +date: 2023-09-18 +tags: [albert, ko, open_source, sequence_classification, onnx] +task: Text Classification +language: ko +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_small_kor_cross_encoder_v1` is a Korean model originally trained by bongsoo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_small_kor_cross_encoder_v1_ko_5.1.2_3.0_1695065204321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_small_kor_cross_encoder_v1_ko_5.1.2_3.0_1695065204321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_small_kor_cross_encoder_v1","ko") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_small_kor_cross_encoder_v1", "ko") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_small_kor_cross_encoder_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|ko| +|Size:|44.0 MB| + +## References + +https://huggingface.co/bongsoo/albert-small-kor-cross-encoder-v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_tiny_spanish_fakenews_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_tiny_spanish_fakenews_en.md new file mode 100644 index 00000000000000..6a00a693140698 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_tiny_spanish_fakenews_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_tiny_spanish_fakenews AlbertForSequenceClassification from natsanchezc +author: John Snow Labs +name: albert_tiny_spanish_fakenews +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_tiny_spanish_fakenews` is a English model originally trained by natsanchezc. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_tiny_spanish_fakenews_en_5.1.2_3.0_1695066230540.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_tiny_spanish_fakenews_en_5.1.2_3.0_1695066230540.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_tiny_spanish_fakenews","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_tiny_spanish_fakenews", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_tiny_spanish_fakenews| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|20.4 MB| + +## References + +https://huggingface.co/natsanchezc/albert-tiny-spanish-fakenews \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_arabic_finetuned_emotion_aetd_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_arabic_finetuned_emotion_aetd_en.md new file mode 100644 index 00000000000000..dbf7d97a6a1387 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_arabic_finetuned_emotion_aetd_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xlarge_arabic_finetuned_emotion_aetd AlbertForSequenceClassification from MahaJar +author: John Snow Labs +name: albert_xlarge_arabic_finetuned_emotion_aetd +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_arabic_finetuned_emotion_aetd` is a English model originally trained by MahaJar. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_arabic_finetuned_emotion_aetd_en_5.1.2_3.0_1695066084097.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_arabic_finetuned_emotion_aetd_en_5.1.2_3.0_1695066084097.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xlarge_arabic_finetuned_emotion_aetd","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_xlarge_arabic_finetuned_emotion_aetd", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_arabic_finetuned_emotion_aetd| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|220.4 MB| + +## References + +https://huggingface.co/MahaJar/albert-xlarge-arabic-finetuned-emotion_AETD \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_en.md new file mode 100644 index 00000000000000..f3511431daa5a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xlarge_vitaminc AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_xlarge_vitaminc +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_vitaminc` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_vitaminc_en_5.1.2_3.0_1695067188597.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_vitaminc_en_5.1.2_3.0_1695067188597.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xlarge_vitaminc","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_xlarge_vitaminc", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_vitaminc| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|220.5 MB| + +## References + +https://huggingface.co/tals/albert-xlarge-vitaminc \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_fever_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_fever_en.md new file mode 100644 index 00000000000000..1a6d168e2cf9ba --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_fever_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xlarge_vitaminc_fever AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_xlarge_vitaminc_fever +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_vitaminc_fever` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_vitaminc_fever_en_5.1.2_3.0_1695066951128.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_vitaminc_fever_en_5.1.2_3.0_1695066951128.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xlarge_vitaminc_fever","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_xlarge_vitaminc_fever", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_vitaminc_fever| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|220.5 MB| + +## References + +https://huggingface.co/tals/albert-xlarge-vitaminc-fever \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_mnli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_mnli_en.md new file mode 100644 index 00000000000000..79ebdfa39f2e7e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_xlarge_vitaminc_mnli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xlarge_vitaminc_mnli AlbertForSequenceClassification from tals +author: John Snow Labs +name: albert_xlarge_vitaminc_mnli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_vitaminc_mnli` is a English model originally trained by tals. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_vitaminc_mnli_en_5.1.2_3.0_1695067066498.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_vitaminc_mnli_en_5.1.2_3.0_1695067066498.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xlarge_vitaminc_mnli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_xlarge_vitaminc_mnli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_vitaminc_mnli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|220.5 MB| + +## References + +https://huggingface.co/tals/albert-xlarge-vitaminc-mnli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_hoax_classifier_def_v1_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_hoax_classifier_def_v1_en.md new file mode 100644 index 00000000000000..acf8f72aa47050 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_hoax_classifier_def_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xxlarge_v2_hoax_classifier_def_v1 AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_xxlarge_v2_hoax_classifier_def_v1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xxlarge_v2_hoax_classifier_def_v1` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xxlarge_v2_hoax_classifier_def_v1_en_5.1.2_3.0_1695068451848.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xxlarge_v2_hoax_classifier_def_v1_en_5.1.2_3.0_1695068451848.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xxlarge_v2_hoax_classifier_def_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_xxlarge_v2_hoax_classifier_def_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xxlarge_v2_hoax_classifier_def_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.9 MB| + +## References + +https://huggingface.co/research-dump/albert-xxlarge-v2_hoax_classifier_def_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_hoax_classifier_v1_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_hoax_classifier_v1_en.md new file mode 100644 index 00000000000000..9fd569fcb93dd9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_hoax_classifier_v1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xxlarge_v2_hoax_classifier_v1 AlbertForSequenceClassification from research-dump +author: John Snow Labs +name: albert_xxlarge_v2_hoax_classifier_v1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xxlarge_v2_hoax_classifier_v1` is a English model originally trained by research-dump. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xxlarge_v2_hoax_classifier_v1_en_5.1.2_3.0_1695067937484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xxlarge_v2_hoax_classifier_v1_en_5.1.2_3.0_1695067937484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xxlarge_v2_hoax_classifier_v1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_xxlarge_v2_hoax_classifier_v1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xxlarge_v2_hoax_classifier_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.9 MB| + +## References + +https://huggingface.co/research-dump/albert-xxlarge-v2_hoax_classifier_v1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli_en.md b/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli_en.md new file mode 100644 index 00000000000000..0182c7a8413685 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli AlbertForSequenceClassification from ynie +author: John Snow Labs +name: albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli` is a English model originally trained by ynie. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli_en_5.1.2_3.0_1695068066328.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli_en_5.1.2_3.0_1695068066328.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xxlarge_v2_snli_mnli_fever_anli_r1_r2_r3_nli| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|833.9 MB| + +## References + +https://huggingface.co/ynie/albert-xxlarge-v2-snli_mnli_fever_anli_R1_R2_R3-nli \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-alberta_fact_checking_en.md b/docs/_posts/ahmedlone127/2023-09-18-alberta_fact_checking_en.md new file mode 100644 index 00000000000000..f248e79332dfee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-alberta_fact_checking_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English alberta_fact_checking AlbertForSequenceClassification from Dzeniks +author: John Snow Labs +name: alberta_fact_checking +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`alberta_fact_checking` is a English model originally trained by Dzeniks. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/alberta_fact_checking_en_5.1.2_3.0_1695065294318.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/alberta_fact_checking_en_5.1.2_3.0_1695065294318.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("alberta_fact_checking","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("alberta_fact_checking", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|alberta_fact_checking| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Dzeniks/alberta_fact_checking \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-autotrain_app_review_train_albert_1314550196_en.md b/docs/_posts/ahmedlone127/2023-09-18-autotrain_app_review_train_albert_1314550196_en.md new file mode 100644 index 00000000000000..329f39cd4ac8ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-autotrain_app_review_train_albert_1314550196_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English autotrain_app_review_train_albert_1314550196 AlbertForSequenceClassification from noob123 +author: John Snow Labs +name: autotrain_app_review_train_albert_1314550196 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_app_review_train_albert_1314550196` is a English model originally trained by noob123. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_app_review_train_albert_1314550196_en_5.1.2_3.0_1695063882846.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_app_review_train_albert_1314550196_en_5.1.2_3.0_1695063882846.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("autotrain_app_review_train_albert_1314550196","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("autotrain_app_review_train_albert_1314550196", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_app_review_train_albert_1314550196| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/noob123/autotrain-app_review_train_albert-1314550196 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-autotrain_security_text_classification_albert_688320769_en.md b/docs/_posts/ahmedlone127/2023-09-18-autotrain_security_text_classification_albert_688320769_en.md new file mode 100644 index 00000000000000..2615a6b3544b24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-autotrain_security_text_classification_albert_688320769_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English autotrain_security_text_classification_albert_688320769 AlbertForSequenceClassification from vlsb +author: John Snow Labs +name: autotrain_security_text_classification_albert_688320769 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`autotrain_security_text_classification_albert_688320769` is a English model originally trained by vlsb. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/autotrain_security_text_classification_albert_688320769_en_5.1.2_3.0_1695068193893.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/autotrain_security_text_classification_albert_688320769_en_5.1.2_3.0_1695068193893.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("autotrain_security_text_classification_albert_688320769","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("autotrain_security_text_classification_albert_688320769", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|autotrain_security_text_classification_albert_688320769| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/vlsb/autotrain-security-text-classification-albert-688320769 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-edos_2023_baseline_albert_base_v2_label_vector_en.md b/docs/_posts/ahmedlone127/2023-09-18-edos_2023_baseline_albert_base_v2_label_vector_en.md new file mode 100644 index 00000000000000..5fe6d2003705ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-edos_2023_baseline_albert_base_v2_label_vector_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English edos_2023_baseline_albert_base_v2_label_vector AlbertForSequenceClassification from lct-rug-2022 +author: John Snow Labs +name: edos_2023_baseline_albert_base_v2_label_vector +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`edos_2023_baseline_albert_base_v2_label_vector` is a English model originally trained by lct-rug-2022. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/edos_2023_baseline_albert_base_v2_label_vector_en_5.1.2_3.0_1695064267131.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/edos_2023_baseline_albert_base_v2_label_vector_en_5.1.2_3.0_1695064267131.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("edos_2023_baseline_albert_base_v2_label_vector","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("edos_2023_baseline_albert_base_v2_label_vector", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|edos_2023_baseline_albert_base_v2_label_vector| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|28.4 MB| + +## References + +https://huggingface.co/lct-rug-2022/edos-2023-baseline-albert-base-v2-label_vector \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-finetuned_albert_base_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-finetuned_albert_base_v2_en.md new file mode 100644 index 00000000000000..1bcbb2abc9afb2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-finetuned_albert_base_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English finetuned_albert_base_v2 AlbertForSequenceClassification from Queensly +author: John Snow Labs +name: finetuned_albert_base_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`finetuned_albert_base_v2` is a English model originally trained by Queensly. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/finetuned_albert_base_v2_en_5.1.2_3.0_1695063297167.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/finetuned_albert_base_v2_en_5.1.2_3.0_1695063297167.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("finetuned_albert_base_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("finetuned_albert_base_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|finetuned_albert_base_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/Queensly/finetuned_albert_base_v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v2_en.md new file mode 100644 index 00000000000000..5f51bcbdc270ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_add_v2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_add_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_add_v2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v2_en_5.1.2_3.0_1695062951241.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v2_en_5.1.2_3.0_1695062951241.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_add_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_add_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_add_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-add-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v3_en.md new file mode 100644 index 00000000000000..c7fb3a0de3c6b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_add_v3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_add_v3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_add_v3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v3_en_5.1.2_3.0_1695063387122.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v3_en_5.1.2_3.0_1695063387122.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_add_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_add_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_add_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-add-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v3_greedy_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v3_greedy_en.md new file mode 100644 index 00000000000000..1df16c5be025b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v3_greedy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_add_v3_greedy AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_add_v3_greedy +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_add_v3_greedy` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v3_greedy_en_5.1.2_3.0_1695066398520.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v3_greedy_en_5.1.2_3.0_1695066398520.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_add_v3_greedy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_add_v3_greedy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_add_v3_greedy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-add-v3-greedy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v4_en.md new file mode 100644 index 00000000000000..4502399a39199e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_add_v4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_add_v4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_add_v4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v4_en_5.1.2_3.0_1695063060870.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v4_en_5.1.2_3.0_1695063060870.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_add_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_add_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_add_v4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-add-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v5_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v5_en.md new file mode 100644 index 00000000000000..d6fbba35b24085 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_add_v5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_add_v5 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_add_v5 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_add_v5` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v5_en_5.1.2_3.0_1695063482602.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_add_v5_en_5.1.2_3.0_1695063482602.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_add_v5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_add_v5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_add_v5| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-add-v5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_back_translation_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_back_translation_en.md new file mode 100644 index 00000000000000..2beca52fe818eb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_back_translation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_back_translation AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_back_translation +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_back_translation` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_back_translation_en_5.1.2_3.0_1695064863291.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_back_translation_en_5.1.2_3.0_1695064863291.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_back_translation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_back_translation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_back_translation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-back-translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_0_en.md new file mode 100644 index 00000000000000..17a38c895ba624 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_eda_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_eda_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_eda_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_0_en_5.1.2_3.0_1695064112411.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_0_en_5.1.2_3.0_1695064112411.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_eda_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_eda_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_eda_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-eda-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_1_en.md new file mode 100644 index 00000000000000..587e5a67c6dd10 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_eda_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_eda_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_eda_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_1_en_5.1.2_3.0_1695064189599.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_1_en_5.1.2_3.0_1695064189599.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_eda_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_eda_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_eda_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-eda-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_2_en.md new file mode 100644 index 00000000000000..747b83ac4611d2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_eda_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_eda_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_eda_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_2_en_5.1.2_3.0_1695064273251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_2_en_5.1.2_3.0_1695064273251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_eda_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_eda_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_eda_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-eda-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_3_en.md new file mode 100644 index 00000000000000..0cec80db71bc85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_eda_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_eda_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_eda_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_3_en_5.1.2_3.0_1695064359403.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_3_en_5.1.2_3.0_1695064359403.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_eda_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_eda_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_eda_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-eda-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_4_en.md new file mode 100644 index 00000000000000..1740a9aedc3641 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_eda_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_eda_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_eda_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_eda_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_4_en_5.1.2_3.0_1695064430928.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_eda_4_en_5.1.2_3.0_1695064430928.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_eda_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_eda_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_eda_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-eda-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_vanilla_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_vanilla_en.md new file mode 100644 index 00000000000000..bb5c45bc9b13f0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_vanilla_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_vanilla AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_vanilla +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_vanilla` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_vanilla_en_5.1.2_3.0_1695067142580.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_vanilla_en_5.1.2_3.0_1695067142580.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_vanilla","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_vanilla", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_vanilla| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-vanilla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0_en.md new file mode 100644 index 00000000000000..833ae6622534b1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0_en_5.1.2_3.0_1695063121298.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0_en_5.1.2_3.0_1695063121298.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_embedding_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-embedding-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1_en.md new file mode 100644 index 00000000000000..7af2303453ef59 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1_en_5.1.2_3.0_1695063314340.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1_en_5.1.2_3.0_1695063314340.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_embedding_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-embedding-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2_en.md new file mode 100644 index 00000000000000..1e7f4dd1d48155 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2_en_5.1.2_3.0_1695063475776.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2_en_5.1.2_3.0_1695063475776.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_embedding_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-embedding-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3_en.md new file mode 100644 index 00000000000000..69d503027fed20 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3_en_5.1.2_3.0_1695063663201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3_en_5.1.2_3.0_1695063663201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_embedding_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-embedding-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4_en.md new file mode 100644 index 00000000000000..a55f8468d1ff12 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4_en_5.1.2_3.0_1695063826582.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4_en_5.1.2_3.0_1695063826582.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_embedding_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-embedding-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_0_en.md new file mode 100644 index 00000000000000..2d182d51f3eeb5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_random_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_random_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_random_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_0_en_5.1.2_3.0_1695065909533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_0_en_5.1.2_3.0_1695065909533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_random_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-random-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_1_en.md new file mode 100644 index 00000000000000..1f5141396ebd0f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_random_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_random_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_random_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_1_en_5.1.2_3.0_1695066252422.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_1_en_5.1.2_3.0_1695066252422.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_random_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-random-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_2_en.md new file mode 100644 index 00000000000000..2382f8e817eb35 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_random_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_random_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_random_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_2_en_5.1.2_3.0_1695066408329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_2_en_5.1.2_3.0_1695066408329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_random_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-random-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_3_en.md new file mode 100644 index 00000000000000..4bfb0d9538f2a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_random_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_random_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_random_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_3_en_5.1.2_3.0_1695066574474.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_3_en_5.1.2_3.0_1695066574474.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_random_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-random-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_4_en.md new file mode 100644 index 00000000000000..21d501e4d559d1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_random_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_random_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_random_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_random_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_4_en_5.1.2_3.0_1695062699768.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_random_4_en_5.1.2_3.0_1695062699768.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_random_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_random_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-random-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0_en.md new file mode 100644 index 00000000000000..76b59faded6777 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0_en_5.1.2_3.0_1695065844297.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0_en_5.1.2_3.0_1695065844297.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_synonym_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-synonym-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1_en.md new file mode 100644 index 00000000000000..84146381888d65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1_en_5.1.2_3.0_1695066182334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1_en_5.1.2_3.0_1695066182334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_synonym_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-synonym-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2_en.md new file mode 100644 index 00000000000000..53666c6ca26306 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2_en_5.1.2_3.0_1695066326686.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2_en_5.1.2_3.0_1695066326686.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_synonym_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-synonym-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3_en.md new file mode 100644 index 00000000000000..d90d74586106ce --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3_en_5.1.2_3.0_1695066489127.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3_en_5.1.2_3.0_1695066489127.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_synonym_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-synonym-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4_en.md new file mode 100644 index 00000000000000..fdee1129e28b7f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4_en_5.1.2_3.0_1695062991788.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4_en_5.1.2_3.0_1695062991788.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_amcd_word_swapping_synonym_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-amcd-word-swapping-synonym-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v2_en.md new file mode 100644 index 00000000000000..11a08dbeb2d352 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_add_v2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_add_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_add_v2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_add_v2_en_5.1.2_3.0_1695063091310.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_add_v2_en_5.1.2_3.0_1695063091310.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_add_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_add_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_add_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-add-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v3_en.md new file mode 100644 index 00000000000000..3f834190e0ad85 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_add_v3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_add_v3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_add_v3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_add_v3_en_5.1.2_3.0_1695063560807.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_add_v3_en_5.1.2_3.0_1695063560807.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_add_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_add_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_add_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-add-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v3_greedy_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v3_greedy_en.md new file mode 100644 index 00000000000000..4d58b42f3024b8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_add_v3_greedy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_add_v3_greedy AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_add_v3_greedy +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_add_v3_greedy` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_add_v3_greedy_en_5.1.2_3.0_1695066486815.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_add_v3_greedy_en_5.1.2_3.0_1695066486815.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_add_v3_greedy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_add_v3_greedy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_add_v3_greedy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-add-v3-greedy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_back_translation_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_back_translation_en.md new file mode 100644 index 00000000000000..97df6d059169b7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_back_translation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_back_translation AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_back_translation +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_back_translation` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_back_translation_en_5.1.2_3.0_1695064948971.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_back_translation_en_5.1.2_3.0_1695064948971.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_back_translation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_back_translation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_back_translation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-back-translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_0_en.md new file mode 100644 index 00000000000000..91ace9c97b9bbb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_eda_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_eda_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_eda_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_0_en_5.1.2_3.0_1695068902480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_0_en_5.1.2_3.0_1695068902480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_eda_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_eda_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_eda_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-eda-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_1_en.md new file mode 100644 index 00000000000000..1755cda356aead --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_eda_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_eda_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_eda_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_1_en_5.1.2_3.0_1695069027405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_1_en_5.1.2_3.0_1695069027405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_eda_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_eda_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_eda_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-eda-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_2_en.md new file mode 100644 index 00000000000000..3ec9a538dc6161 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_eda_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_eda_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_eda_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_2_en_5.1.2_3.0_1695069246806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_2_en_5.1.2_3.0_1695069246806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_eda_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_eda_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_eda_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-eda-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_3_en.md new file mode 100644 index 00000000000000..21122daf0d4be6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_eda_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_eda_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_eda_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_3_en_5.1.2_3.0_1695069360213.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_3_en_5.1.2_3.0_1695069360213.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_eda_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_eda_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_eda_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-eda-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_4_en.md new file mode 100644 index 00000000000000..b06c6214d1afa0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_eda_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_eda_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_eda_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_eda_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_4_en_5.1.2_3.0_1695069414782.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_eda_4_en_5.1.2_3.0_1695069414782.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_eda_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_eda_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_eda_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-eda-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_vanilla_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_vanilla_en.md new file mode 100644 index 00000000000000..76319a7f984c60 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_vanilla_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_vanilla AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_vanilla +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_vanilla` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_vanilla_en_5.1.2_3.0_1695067318954.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_vanilla_en_5.1.2_3.0_1695067318954.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_vanilla","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_vanilla", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_vanilla| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-vanilla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0_en.md new file mode 100644 index 00000000000000..7ef60552100b65 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0_en_5.1.2_3.0_1695063897321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0_en_5.1.2_3.0_1695063897321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-embedding-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1_en.md new file mode 100644 index 00000000000000..6fc67cbad9356f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1_en_5.1.2_3.0_1695064057062.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1_en_5.1.2_3.0_1695064057062.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-embedding-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2_en.md new file mode 100644 index 00000000000000..cf2f6d37fa29cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2_en_5.1.2_3.0_1695064200809.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2_en_5.1.2_3.0_1695064200809.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-embedding-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3_en.md new file mode 100644 index 00000000000000..fd562b86add36f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3_en_5.1.2_3.0_1695064370410.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3_en_5.1.2_3.0_1695064370410.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-embedding-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4_en.md new file mode 100644 index 00000000000000..a798480ff00953 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4_en_5.1.2_3.0_1695064462661.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4_en_5.1.2_3.0_1695064462661.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_embedding_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-embedding-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_0_en.md new file mode 100644 index 00000000000000..7cec0ef9e29d0b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_random_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_random_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_random_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_random_0_en_5.1.2_3.0_1695066662585.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_random_0_en_5.1.2_3.0_1695066662585.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_random_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_random_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_random_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-random-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_3_en.md new file mode 100644 index 00000000000000..76c3ec735254ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_random_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_random_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_random_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_random_3_en_5.1.2_3.0_1695067140590.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_random_3_en_5.1.2_3.0_1695067140590.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_random_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_random_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_random_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-random-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_4_en.md new file mode 100644 index 00000000000000..598010d1e2a3f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_random_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_random_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_random_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_random_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_random_4_en_5.1.2_3.0_1695067322469.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_random_4_en_5.1.2_3.0_1695067322469.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_random_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_random_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_random_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-random-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0_en.md new file mode 100644 index 00000000000000..fb3b62b1c961e1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0_en_5.1.2_3.0_1695066756459.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0_en_5.1.2_3.0_1695066756459.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-synonym-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1_en.md new file mode 100644 index 00000000000000..5bdb0cc293c0f7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1_en_5.1.2_3.0_1695066976098.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1_en_5.1.2_3.0_1695066976098.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-synonym-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2_en.md new file mode 100644 index 00000000000000..b98dce2520d857 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2_en_5.1.2_3.0_1695067237324.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2_en_5.1.2_3.0_1695067237324.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-synonym-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3_en.md new file mode 100644 index 00000000000000..b685fad9a04cf7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3_en_5.1.2_3.0_1695067412841.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3_en_5.1.2_3.0_1695067412841.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-synonym-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4_en.md new file mode 100644 index 00000000000000..6e35cf45bcdd8c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4_en_5.1.2_3.0_1695067744764.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4_en_5.1.2_3.0_1695067744764.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_chemprot_word_swapping_synonym_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.3 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-chemprot-word-swapping-synonym-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v2_en.md new file mode 100644 index 00000000000000..e566ae171ded90 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_add_v2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_add_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_add_v2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v2_en_5.1.2_3.0_1695062819521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v2_en_5.1.2_3.0_1695062819521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_add_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_add_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_add_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-add-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v3_en.md new file mode 100644 index 00000000000000..7e3d46fc31f9b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_add_v3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_add_v3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_add_v3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v3_en_5.1.2_3.0_1695063312027.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v3_en_5.1.2_3.0_1695063312027.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_add_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_add_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_add_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-add-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v3_greedy_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v3_greedy_en.md new file mode 100644 index 00000000000000..7ebb02ab78ee36 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v3_greedy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_add_v3_greedy AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_add_v3_greedy +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_add_v3_greedy` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v3_greedy_en_5.1.2_3.0_1695066570491.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v3_greedy_en_5.1.2_3.0_1695066570491.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_add_v3_greedy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_add_v3_greedy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_add_v3_greedy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-add-v3-greedy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v4_en.md new file mode 100644 index 00000000000000..2182ac0f5de444 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_add_v4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_add_v4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_add_v4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v4_en_5.1.2_3.0_1695063125626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v4_en_5.1.2_3.0_1695063125626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_add_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_add_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_add_v4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-add-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v5_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v5_en.md new file mode 100644 index 00000000000000..3a2c1b64429a1a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_add_v5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_add_v5 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_add_v5 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_add_v5` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v5_en_5.1.2_3.0_1695063569950.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_add_v5_en_5.1.2_3.0_1695063569950.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_add_v5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_add_v5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_add_v5| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-add-v5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_back_translation_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_back_translation_en.md new file mode 100644 index 00000000000000..3318ec1a178415 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_back_translation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_back_translation AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_back_translation +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_back_translation` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_back_translation_en_5.1.2_3.0_1695065043736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_back_translation_en_5.1.2_3.0_1695065043736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_back_translation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_back_translation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_back_translation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-back-translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_0_en.md new file mode 100644 index 00000000000000..15fabf0fca2af2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_eda_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_eda_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_eda_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_0_en_5.1.2_3.0_1695064493620.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_0_en_5.1.2_3.0_1695064493620.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_eda_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_eda_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_eda_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-eda-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_1_en.md new file mode 100644 index 00000000000000..a4cab28ae5335f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_eda_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_eda_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_eda_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_1_en_5.1.2_3.0_1695064569477.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_1_en_5.1.2_3.0_1695064569477.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_eda_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_eda_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_eda_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-eda-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_2_en.md new file mode 100644 index 00000000000000..1a040d9af1796d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_eda_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_eda_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_eda_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_2_en_5.1.2_3.0_1695064658768.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_2_en_5.1.2_3.0_1695064658768.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_eda_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_eda_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_eda_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-eda-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_3_en.md new file mode 100644 index 00000000000000..ccf9eb3c74621c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_eda_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_eda_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_eda_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_3_en_5.1.2_3.0_1695064758210.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_3_en_5.1.2_3.0_1695064758210.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_eda_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_eda_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_eda_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-eda-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_4_en.md new file mode 100644 index 00000000000000..e3861a6eabb66b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_eda_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_eda_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_eda_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_eda_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_4_en_5.1.2_3.0_1695065447398.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_eda_4_en_5.1.2_3.0_1695065447398.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_eda_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_eda_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_eda_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-eda-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_vanilla_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_vanilla_en.md new file mode 100644 index 00000000000000..64b75717f8ff6b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_vanilla_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_vanilla AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_vanilla +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_vanilla` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_vanilla_en_5.1.2_3.0_1695067394522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_vanilla_en_5.1.2_3.0_1695067394522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_vanilla","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_vanilla", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_vanilla| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-vanilla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0_en.md new file mode 100644 index 00000000000000..c2a617c8433bfc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0_en_5.1.2_3.0_1695064640923.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0_en_5.1.2_3.0_1695064640923.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-embedding-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1_en.md new file mode 100644 index 00000000000000..fafcc952797526 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1_en_5.1.2_3.0_1695064751933.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1_en_5.1.2_3.0_1695064751933.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-embedding-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2_en.md new file mode 100644 index 00000000000000..03c907d1678016 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2_en_5.1.2_3.0_1695064945303.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2_en_5.1.2_3.0_1695064945303.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-embedding-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3_en.md new file mode 100644 index 00000000000000..ac63c3912458f6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3_en_5.1.2_3.0_1695065035312.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3_en_5.1.2_3.0_1695065035312.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-embedding-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4_en.md new file mode 100644 index 00000000000000..53a5d3d19e826f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4_en_5.1.2_3.0_1695065198692.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4_en_5.1.2_3.0_1695065198692.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_embedding_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-embedding-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0_en.md new file mode 100644 index 00000000000000..542937125942ef --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0_en_5.1.2_3.0_1695068329153.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0_en_5.1.2_3.0_1695068329153.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_random_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-random-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1_en.md new file mode 100644 index 00000000000000..166c3053d872d6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1_en_5.1.2_3.0_1695068474188.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1_en_5.1.2_3.0_1695068474188.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_random_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-random-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2_en.md new file mode 100644 index 00000000000000..2300a84081b4cd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2_en_5.1.2_3.0_1695068552306.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2_en_5.1.2_3.0_1695068552306.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_random_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-random-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3_en.md new file mode 100644 index 00000000000000..cd93393f7193fa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3_en_5.1.2_3.0_1695068640615.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3_en_5.1.2_3.0_1695068640615.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_random_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-random-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4_en.md new file mode 100644 index 00000000000000..bfbb8a8e82e962 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4_en_5.1.2_3.0_1695068733768.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4_en_5.1.2_3.0_1695068733768.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_random_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-random-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0_en.md new file mode 100644 index 00000000000000..01159c40fb4155 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0_en_5.1.2_3.0_1695067815473.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0_en_5.1.2_3.0_1695067815473.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-synonym-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1_en.md new file mode 100644 index 00000000000000..2b9a4f483058de --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1_en_5.1.2_3.0_1695067896321.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1_en_5.1.2_3.0_1695067896321.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-synonym-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2_en.md new file mode 100644 index 00000000000000..93045bec21b032 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2_en_5.1.2_3.0_1695068045326.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2_en_5.1.2_3.0_1695068045326.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-synonym-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3_en.md new file mode 100644 index 00000000000000..d0a684fc6b7171 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3_en_5.1.2_3.0_1695068117515.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3_en_5.1.2_3.0_1695068117515.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-synonym-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4_en.md new file mode 100644 index 00000000000000..8c4367d41053da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4_en_5.1.2_3.0_1695068253653.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4_en_5.1.2_3.0_1695068253653.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_citation_intent_word_swapping_synonym_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-citation-intent-word-swapping-synonym-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v2_en.md new file mode 100644 index 00000000000000..d01cde44543022 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_add_v2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_add_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_add_v2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v2_en_5.1.2_3.0_1695063955500.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v2_en_5.1.2_3.0_1695063955500.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_add_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_add_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_add_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-add-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v3_en.md new file mode 100644 index 00000000000000..952370d77ab6c5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_add_v3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_add_v3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_add_v3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v3_en_5.1.2_3.0_1695064029052.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v3_en_5.1.2_3.0_1695064029052.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_add_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_add_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_add_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-add-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v3_greedy_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v3_greedy_en.md new file mode 100644 index 00000000000000..c5f6ac07fbcbd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v3_greedy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_add_v3_greedy AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_add_v3_greedy +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_add_v3_greedy` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v3_greedy_en_5.1.2_3.0_1695066653173.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v3_greedy_en_5.1.2_3.0_1695066653173.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_add_v3_greedy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_add_v3_greedy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_add_v3_greedy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-add-v3-greedy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v4_en.md new file mode 100644 index 00000000000000..b57ccc55ad810d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_add_v4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_add_v4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_add_v4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v4_en_5.1.2_3.0_1695063191334.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v4_en_5.1.2_3.0_1695063191334.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_add_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_add_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_add_v4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-add-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v5_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v5_en.md new file mode 100644 index 00000000000000..64bd8d0e0e6992 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_add_v5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_add_v5 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_add_v5 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_add_v5` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v5_en_5.1.2_3.0_1695063666282.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_add_v5_en_5.1.2_3.0_1695063666282.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_add_v5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_add_v5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_add_v5| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-add-v5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_back_translation_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_back_translation_en.md new file mode 100644 index 00000000000000..068746474fc8a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_back_translation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_back_translation AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_back_translation +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_back_translation` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_back_translation_en_5.1.2_3.0_1695065123975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_back_translation_en_5.1.2_3.0_1695065123975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_back_translation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_back_translation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_back_translation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-back-translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_0_en.md new file mode 100644 index 00000000000000..d919640e6ea9c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_eda_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_eda_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_eda_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_0_en_5.1.2_3.0_1695061656434.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_0_en_5.1.2_3.0_1695061656434.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_eda_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_eda_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_eda_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-eda-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_1_en.md new file mode 100644 index 00000000000000..090af9128c9dee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_eda_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_eda_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_eda_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_1_en_5.1.2_3.0_1695061806670.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_1_en_5.1.2_3.0_1695061806670.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_eda_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_eda_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_eda_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-eda-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_2_en.md new file mode 100644 index 00000000000000..eb1d89161c9ce4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_eda_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_eda_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_eda_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_2_en_5.1.2_3.0_1695061974716.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_2_en_5.1.2_3.0_1695061974716.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_eda_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_eda_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_eda_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-eda-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_3_en.md new file mode 100644 index 00000000000000..5106cc040c63a3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_eda_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_eda_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_eda_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_3_en_5.1.2_3.0_1695062165896.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_3_en_5.1.2_3.0_1695062165896.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_eda_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_eda_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_eda_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-eda-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_4_en.md new file mode 100644 index 00000000000000..efaa5db5af2eaa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_eda_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_eda_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_eda_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_eda_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_4_en_5.1.2_3.0_1695062453783.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_eda_4_en_5.1.2_3.0_1695062453783.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_eda_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_eda_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_eda_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-eda-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_vanilla_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_vanilla_en.md new file mode 100644 index 00000000000000..175da967e887fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_vanilla_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_vanilla AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_vanilla +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_vanilla` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_vanilla_en_5.1.2_3.0_1695067563736.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_vanilla_en_5.1.2_3.0_1695067563736.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_vanilla","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_vanilla", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_vanilla| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-vanilla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0_en.md new file mode 100644 index 00000000000000..4897610141e9fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0_en_5.1.2_3.0_1695065942415.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0_en_5.1.2_3.0_1695065942415.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-embedding-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1_en.md new file mode 100644 index 00000000000000..58594d379000fe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1_en_5.1.2_3.0_1695066011521.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1_en_5.1.2_3.0_1695066011521.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-embedding-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2_en.md new file mode 100644 index 00000000000000..2cf355a4fe7dbe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2_en_5.1.2_3.0_1695065648058.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2_en_5.1.2_3.0_1695065648058.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-embedding-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3_en.md new file mode 100644 index 00000000000000..103f740961c1ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3_en_5.1.2_3.0_1695066841795.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3_en_5.1.2_3.0_1695066841795.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-embedding-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4_en.md new file mode 100644 index 00000000000000..47ed5deb92ae95 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4_en_5.1.2_3.0_1695067047043.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4_en_5.1.2_3.0_1695067047043.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_embedding_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-embedding-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0_en.md new file mode 100644 index 00000000000000..bec891833832e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0_en_5.1.2_3.0_1695066079195.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0_en_5.1.2_3.0_1695066079195.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_random_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-random-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1_en.md new file mode 100644 index 00000000000000..62d35f3b10bc5a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1_en_5.1.2_3.0_1695066143519.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1_en_5.1.2_3.0_1695066143519.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_random_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-random-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2_en.md new file mode 100644 index 00000000000000..09b71afd20913f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2_en_5.1.2_3.0_1695062777040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2_en_5.1.2_3.0_1695062777040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_random_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-random-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3_en.md new file mode 100644 index 00000000000000..a36f6a94deeda3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3_en_5.1.2_3.0_1695062856364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3_en_5.1.2_3.0_1695062856364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_random_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-random-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4_en.md new file mode 100644 index 00000000000000..3c72290327a19f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4_en_5.1.2_3.0_1695062920631.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4_en_5.1.2_3.0_1695062920631.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_random_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-random-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0_en.md new file mode 100644 index 00000000000000..ff0146f9276b3c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0_en_5.1.2_3.0_1695065820899.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0_en_5.1.2_3.0_1695065820899.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-synonym-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1_en.md new file mode 100644 index 00000000000000..83263e30d659e6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1_en_5.1.2_3.0_1695065882137.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1_en_5.1.2_3.0_1695065882137.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-synonym-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2_en.md new file mode 100644 index 00000000000000..9cf02e6fa0632a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2_en_5.1.2_3.0_1695068412804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2_en_5.1.2_3.0_1695068412804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-synonym-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3_en.md new file mode 100644 index 00000000000000..71bcf3b9beb309 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3_en_5.1.2_3.0_1695068815311.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3_en_5.1.2_3.0_1695068815311.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-synonym-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4_en.md new file mode 100644 index 00000000000000..6dedbe20c197cf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4_en_5.1.2_3.0_1695068971021.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4_en_5.1.2_3.0_1695068971021.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_rct_sample_word_swapping_synonym_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-rct-sample-word-swapping-synonym-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v2_en.md new file mode 100644 index 00000000000000..3d11ef71efa4fb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_add_v2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_add_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_add_v2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_add_v2_en_5.1.2_3.0_1695062883529.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_add_v2_en_5.1.2_3.0_1695062883529.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_add_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_add_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_add_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-add-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v3_en.md new file mode 100644 index 00000000000000..423b456b259d82 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_add_v3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_add_v3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_add_v3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_add_v3_en_5.1.2_3.0_1695063472364.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_add_v3_en_5.1.2_3.0_1695063472364.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_add_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_add_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_add_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-add-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v3_greedy_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v3_greedy_en.md new file mode 100644 index 00000000000000..dbf29d55db23c9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_add_v3_greedy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_add_v3_greedy AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_add_v3_greedy +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_add_v3_greedy` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_add_v3_greedy_en_5.1.2_3.0_1695066755333.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_add_v3_greedy_en_5.1.2_3.0_1695066755333.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_add_v3_greedy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_add_v3_greedy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_add_v3_greedy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-add-v3-greedy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_back_translation_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_back_translation_en.md new file mode 100644 index 00000000000000..20b210e27fa13f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_back_translation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_back_translation AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_back_translation +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_back_translation` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_back_translation_en_5.1.2_3.0_1695065208298.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_back_translation_en_5.1.2_3.0_1695065208298.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_back_translation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_back_translation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_back_translation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-back-translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_0_en.md new file mode 100644 index 00000000000000..7d838319e59033 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_eda_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_eda_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_eda_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_0_en_5.1.2_3.0_1695062601285.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_0_en_5.1.2_3.0_1695062601285.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_eda_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_eda_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_eda_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-eda-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_1_en.md new file mode 100644 index 00000000000000..2bf2f456484ebe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_eda_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_eda_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_eda_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_1_en_5.1.2_3.0_1695062683576.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_1_en_5.1.2_3.0_1695062683576.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_eda_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_eda_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_eda_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-eda-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_2_en.md new file mode 100644 index 00000000000000..ae37966808d6b6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_eda_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_eda_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_eda_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_2_en_5.1.2_3.0_1695062761915.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_2_en_5.1.2_3.0_1695062761915.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_eda_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_eda_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_eda_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-eda-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_3_en.md new file mode 100644 index 00000000000000..0ccb2c61e72fbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_eda_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_eda_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_eda_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_3_en_5.1.2_3.0_1695062915099.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_3_en_5.1.2_3.0_1695062915099.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_eda_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_eda_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_eda_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-eda-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_4_en.md new file mode 100644 index 00000000000000..a414c8fed903d7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_eda_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_eda_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_eda_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_eda_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_4_en_5.1.2_3.0_1695065361942.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_eda_4_en_5.1.2_3.0_1695065361942.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_eda_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_eda_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_eda_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-eda-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_vanilla_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_vanilla_en.md new file mode 100644 index 00000000000000..e5af7f0ac519df --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_vanilla_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_vanilla AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_vanilla +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_vanilla` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_vanilla_en_5.1.2_3.0_1695067737806.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_vanilla_en_5.1.2_3.0_1695067737806.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_vanilla","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_vanilla", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_vanilla| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-vanilla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0_en.md new file mode 100644 index 00000000000000..e4d7e2028c25a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0_en_5.1.2_3.0_1695065711115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0_en_5.1.2_3.0_1695065711115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_embedding_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-embedding-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1_en.md new file mode 100644 index 00000000000000..1da91f90b667dd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1_en_5.1.2_3.0_1695065780276.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1_en_5.1.2_3.0_1695065780276.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_embedding_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-embedding-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2_en.md new file mode 100644 index 00000000000000..b021c22d14cfca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2_en_5.1.2_3.0_1695065979772.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2_en_5.1.2_3.0_1695065979772.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_embedding_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-embedding-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3_en.md new file mode 100644 index 00000000000000..1aa26b12b9fa63 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3_en_5.1.2_3.0_1695066049985.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3_en_5.1.2_3.0_1695066049985.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_embedding_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-embedding-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4_en.md new file mode 100644 index 00000000000000..1b470555f4cc62 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4_en_5.1.2_3.0_1695066115015.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4_en_5.1.2_3.0_1695066115015.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_embedding_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-embedding-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_0_en.md new file mode 100644 index 00000000000000..bd2eb6f1daab94 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_random_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_random_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_random_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_0_en_5.1.2_3.0_1695067506121.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_0_en_5.1.2_3.0_1695067506121.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_random_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-random-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_1_en.md new file mode 100644 index 00000000000000..cf68eb7e85c88a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_random_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_random_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_random_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_1_en_5.1.2_3.0_1695067589057.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_1_en_5.1.2_3.0_1695067589057.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_random_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-random-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_2_en.md new file mode 100644 index 00000000000000..a0e90e2d448c70 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_random_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_random_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_random_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_2_en_5.1.2_3.0_1695067665091.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_2_en_5.1.2_3.0_1695067665091.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_random_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-random-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_3_en.md new file mode 100644 index 00000000000000..d5ff4a97afe495 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_random_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_random_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_random_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_3_en_5.1.2_3.0_1695067959147.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_3_en_5.1.2_3.0_1695067959147.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_random_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-random-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_4_en.md new file mode 100644 index 00000000000000..7dee7787fe746d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_random_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_random_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_random_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_random_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_4_en_5.1.2_3.0_1695068182594.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_random_4_en_5.1.2_3.0_1695068182594.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_random_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_random_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-random-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0_en.md new file mode 100644 index 00000000000000..feb622894c946a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0_en_5.1.2_3.0_1695065753378.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0_en_5.1.2_3.0_1695065753378.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_synonym_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-synonym-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1_en.md new file mode 100644 index 00000000000000..48d3b6e2fcb6ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1_en_5.1.2_3.0_1695069332405.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1_en_5.1.2_3.0_1695069332405.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_synonym_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-synonym-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2_en.md new file mode 100644 index 00000000000000..ec46f1776484e0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2_en_5.1.2_3.0_1695061662249.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2_en_5.1.2_3.0_1695061662249.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_synonym_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-synonym-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3_en.md new file mode 100644 index 00000000000000..66e79f8f50cf86 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3_en_5.1.2_3.0_1695061817028.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3_en_5.1.2_3.0_1695061817028.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_synonym_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-synonym-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4_en.md new file mode 100644 index 00000000000000..9409c0cf1c9786 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4_en_5.1.2_3.0_1695061987104.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4_en_5.1.2_3.0_1695061987104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_sciie_word_swapping_synonym_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-sciie-word-swapping-synonym-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2_en.md new file mode 100644 index 00000000000000..796205a6ea86d4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2_en_5.1.2_3.0_1695063017835.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2_en_5.1.2_3.0_1695063017835.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_add_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-add-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_en.md new file mode 100644 index 00000000000000..4e439c50ebb443 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_en_5.1.2_3.0_1695063651684.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_en_5.1.2_3.0_1695063651684.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-add-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy_en.md new file mode 100644 index 00000000000000..42c75f082ca8a8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy_en_5.1.2_3.0_1695066988320.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy_en_5.1.2_3.0_1695066988320.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_add_v3_greedy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-add-v3-greedy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4_en.md new file mode 100644 index 00000000000000..718ba3a88993bf --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4_en_5.1.2_3.0_1695063402223.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4_en_5.1.2_3.0_1695063402223.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_add_v4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-add-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5_en.md new file mode 100644 index 00000000000000..bda2574b91d482 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5_en_5.1.2_3.0_1695063908626.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5_en_5.1.2_3.0_1695063908626.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_add_v5| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-add-v5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation_en.md new file mode 100644 index 00000000000000..b001f181a9ea2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation_en_5.1.2_3.0_1695065600854.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation_en_5.1.2_3.0_1695065600854.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_back_translation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-back-translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0_en.md new file mode 100644 index 00000000000000..b227fe0b30d0a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0_en_5.1.2_3.0_1695064543714.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0_en_5.1.2_3.0_1695064543714.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_eda_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-eda-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1_en.md new file mode 100644 index 00000000000000..7f8262682f1d9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1_en_5.1.2_3.0_1695065376115.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1_en_5.1.2_3.0_1695065376115.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_eda_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-eda-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2_en.md new file mode 100644 index 00000000000000..3339d5618b9b17 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2_en_5.1.2_3.0_1695064863104.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2_en_5.1.2_3.0_1695064863104.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_eda_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-eda-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3_en.md new file mode 100644 index 00000000000000..99924af8436e22 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3_en_5.1.2_3.0_1695065115894.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3_en_5.1.2_3.0_1695065115894.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_eda_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-eda-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4_en.md new file mode 100644 index 00000000000000..c947119de7f3a0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4_en_5.1.2_3.0_1695065287936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4_en_5.1.2_3.0_1695065287936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_eda_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-eda-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla_en.md new file mode 100644 index 00000000000000..c3a15cbced256c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla_en_5.1.2_3.0_1695068173633.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla_en_5.1.2_3.0_1695068173633.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_vanilla| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-vanilla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0_en.md new file mode 100644 index 00000000000000..e28f83545e3efe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0_en_5.1.2_3.0_1695065442742.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0_en_5.1.2_3.0_1695065442742.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-embedding-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1_en.md new file mode 100644 index 00000000000000..685e51c6a2978b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1_en_5.1.2_3.0_1695065505528.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1_en_5.1.2_3.0_1695065505528.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-embedding-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2_en.md new file mode 100644 index 00000000000000..d8a4a52a45fa8e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2_en_5.1.2_3.0_1695065580077.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2_en_5.1.2_3.0_1695065580077.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-embedding-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3_en.md new file mode 100644 index 00000000000000..60d3f74a9fe59c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3_en_5.1.2_3.0_1695062334761.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3_en_5.1.2_3.0_1695062334761.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-embedding-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4_en.md new file mode 100644 index 00000000000000..6ea9fe74ed5ee6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4_en_5.1.2_3.0_1695062479803.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4_en_5.1.2_3.0_1695062479803.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_embedding_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-embedding-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0_en.md new file mode 100644 index 00000000000000..a574f427c2d027 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0_en_5.1.2_3.0_1695069388140.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0_en_5.1.2_3.0_1695069388140.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-random-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1_en.md new file mode 100644 index 00000000000000..0552195b65312b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1_en_5.1.2_3.0_1695061745442.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1_en_5.1.2_3.0_1695061745442.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-random-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2_en.md new file mode 100644 index 00000000000000..44c2f1f5821f49 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2_en_5.1.2_3.0_1695061904359.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2_en_5.1.2_3.0_1695061904359.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-random-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3_en.md new file mode 100644 index 00000000000000..8e057f6a906d9c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3_en_5.1.2_3.0_1695062074718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3_en_5.1.2_3.0_1695062074718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-random-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4_en.md new file mode 100644 index 00000000000000..33b1ff921632e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4_en_5.1.2_3.0_1695062164729.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4_en_5.1.2_3.0_1695062164729.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_random_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-random-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0_en.md new file mode 100644 index 00000000000000..7fb59dabf972da --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0_en_5.1.2_3.0_1695066231533.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0_en_5.1.2_3.0_1695066231533.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-synonym-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1_en.md new file mode 100644 index 00000000000000..7d1cd475286ccd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1_en_5.1.2_3.0_1695062254406.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1_en_5.1.2_3.0_1695062254406.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-synonym-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2_en.md new file mode 100644 index 00000000000000..5a1f651a5a5012 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2_en_5.1.2_3.0_1695062413251.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2_en_5.1.2_3.0_1695062413251.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-synonym-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3_en.md new file mode 100644 index 00000000000000..a4764cfe5805e2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3_en_5.1.2_3.0_1695062547268.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3_en_5.1.2_3.0_1695062547268.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-synonym-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4_en.md new file mode 100644 index 00000000000000..690639267d00a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4_en_5.1.2_3.0_1695062628121.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4_en_5.1.2_3.0_1695062628121.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_emotion_word_swapping_synonym_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-emotion-word-swapping-synonym-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v2_en.md new file mode 100644 index 00000000000000..f5dc984b96b5b0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_add_v2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_add_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_add_v2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v2_en_5.1.2_3.0_1695063238031.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v2_en_5.1.2_3.0_1695063238031.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_add_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-add-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_en.md new file mode 100644 index 00000000000000..63c4193415d0a1 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_add_v3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_add_v3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_add_v3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_en_5.1.2_3.0_1695063733087.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_en_5.1.2_3.0_1695063733087.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_add_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-add-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy_en.md new file mode 100644 index 00000000000000..147e35662a654d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy_en_5.1.2_3.0_1695066918422.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy_en_5.1.2_3.0_1695066918422.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_add_v3_greedy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-add-v3-greedy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v4_en.md new file mode 100644 index 00000000000000..ff1dffb437b48b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_add_v4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_add_v4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_add_v4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v4_en_5.1.2_3.0_1695063329939.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v4_en_5.1.2_3.0_1695063329939.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_add_v4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-add-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v5_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v5_en.md new file mode 100644 index 00000000000000..5fa6af16876097 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_add_v5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_add_v5 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_add_v5 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_add_v5` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v5_en_5.1.2_3.0_1695063831720.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_add_v5_en_5.1.2_3.0_1695063831720.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_add_v5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_add_v5| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-add-v5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_back_translation_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_back_translation_en.md new file mode 100644 index 00000000000000..ddecc90433c0b3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_back_translation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_back_translation AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_back_translation +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_back_translation` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_back_translation_en_5.1.2_3.0_1695065521901.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_back_translation_en_5.1.2_3.0_1695065521901.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_back_translation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_back_translation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_back_translation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-back-translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_0_en.md new file mode 100644 index 00000000000000..3eaff3c18355aa --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_eda_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_eda_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_eda_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_0_en_5.1.2_3.0_1695063567675.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_0_en_5.1.2_3.0_1695063567675.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_eda_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-eda-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_1_en.md new file mode 100644 index 00000000000000..d8af0fd892b4c0 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_eda_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_eda_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_eda_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_1_en_5.1.2_3.0_1695063744010.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_1_en_5.1.2_3.0_1695063744010.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_eda_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-eda-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_2_en.md new file mode 100644 index 00000000000000..a167176e0f5af2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_eda_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_eda_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_eda_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_2_en_5.1.2_3.0_1695063981698.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_2_en_5.1.2_3.0_1695063981698.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_eda_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-eda-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_3_en.md new file mode 100644 index 00000000000000..d12216d819b9e9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_eda_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_eda_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_eda_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_3_en_5.1.2_3.0_1695064129822.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_3_en_5.1.2_3.0_1695064129822.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_eda_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-eda-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_4_en.md new file mode 100644 index 00000000000000..e0e4f461eb7954 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_eda_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_eda_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_eda_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_eda_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_4_en_5.1.2_3.0_1695064283114.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_eda_4_en_5.1.2_3.0_1695064283114.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_eda_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_eda_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-eda-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_vanilla_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_vanilla_en.md new file mode 100644 index 00000000000000..52378655a34fac --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_vanilla_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_vanilla AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_vanilla +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_vanilla` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_vanilla_en_5.1.2_3.0_1695068105957.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_vanilla_en_5.1.2_3.0_1695068105957.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_vanilla","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_vanilla", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_vanilla| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-vanilla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0_en.md new file mode 100644 index 00000000000000..1e6d137f32f6a7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0_en_5.1.2_3.0_1695061735581.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0_en_5.1.2_3.0_1695061735581.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-embedding-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1_en.md new file mode 100644 index 00000000000000..6c71b0b2cdffd7 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1_en_5.1.2_3.0_1695061884317.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1_en_5.1.2_3.0_1695061884317.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-embedding-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2_en.md new file mode 100644 index 00000000000000..f47ec888b13609 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2_en_5.1.2_3.0_1695062075224.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2_en_5.1.2_3.0_1695062075224.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-embedding-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3_en.md new file mode 100644 index 00000000000000..50bcfd5b21a393 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3_en_5.1.2_3.0_1695062319093.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3_en_5.1.2_3.0_1695062319093.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-embedding-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4_en.md new file mode 100644 index 00000000000000..e178ea76dd316c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4_en_5.1.2_3.0_1695062839881.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4_en_5.1.2_3.0_1695062839881.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_embedding_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-embedding-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0_en.md new file mode 100644 index 00000000000000..d3191be1953fb9 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0_en_5.1.2_3.0_1695068905936.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0_en_5.1.2_3.0_1695068905936.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-random-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1_en.md new file mode 100644 index 00000000000000..e883c895a62b2d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1_en_5.1.2_3.0_1695069032975.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1_en_5.1.2_3.0_1695069032975.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-random-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2_en.md new file mode 100644 index 00000000000000..36a8fa8c2cf672 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2_en_5.1.2_3.0_1695069156917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2_en_5.1.2_3.0_1695069156917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-random-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3_en.md new file mode 100644 index 00000000000000..3360e5d25b7f92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3_en_5.1.2_3.0_1695069218917.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3_en_5.1.2_3.0_1695069218917.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-random-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4_en.md new file mode 100644 index 00000000000000..df5fe7179812a2 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4_en_5.1.2_3.0_1695069277201.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4_en_5.1.2_3.0_1695069277201.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_random_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-random-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0_en.md new file mode 100644 index 00000000000000..8bd508e5d2b95a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0_en_5.1.2_3.0_1695068812882.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0_en_5.1.2_3.0_1695068812882.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-synonym-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1_en.md new file mode 100644 index 00000000000000..5e734019c7a282 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1_en_5.1.2_3.0_1695068964030.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1_en_5.1.2_3.0_1695068964030.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-synonym-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2_en.md new file mode 100644 index 00000000000000..c7a8bbb0973c83 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2_en_5.1.2_3.0_1695069189667.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2_en_5.1.2_3.0_1695069189667.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-synonym-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3_en.md new file mode 100644 index 00000000000000..5d72aaacfa5f0e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3_en_5.1.2_3.0_1695069302240.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3_en_5.1.2_3.0_1695069302240.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-synonym-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4_en.md new file mode 100644 index 00000000000000..4e695f67932e81 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4_en_5.1.2_3.0_1695061559983.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4_en_5.1.2_3.0_1695061559983.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_hate_word_swapping_synonym_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-hate-word-swapping-synonym-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v2_en.md new file mode 100644 index 00000000000000..53c4e45f0435fc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_add_v2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_add_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_add_v2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v2_en_5.1.2_3.0_1695063168425.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v2_en_5.1.2_3.0_1695063168425.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_add_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-add-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_en.md new file mode 100644 index 00000000000000..918c57afe16b15 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_add_v3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_add_v3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_add_v3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_en_5.1.2_3.0_1695063815484.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_en_5.1.2_3.0_1695063815484.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_add_v3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-add-v3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy_en.md new file mode 100644 index 00000000000000..9da0c7e8cef7fd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy_en_5.1.2_3.0_1695066841376.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy_en_5.1.2_3.0_1695066841376.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_add_v3_greedy| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-add-v3-greedy \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v4_en.md new file mode 100644 index 00000000000000..a7183e23724b24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_add_v4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_add_v4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_add_v4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v4_en_5.1.2_3.0_1695063264391.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v4_en_5.1.2_3.0_1695063264391.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_add_v4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-add-v4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v5_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v5_en.md new file mode 100644 index 00000000000000..a00f318ceff60e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_add_v5_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_add_v5 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_add_v5 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_add_v5` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v5_en_5.1.2_3.0_1695063751751.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_add_v5_en_5.1.2_3.0_1695063751751.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v5","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_add_v5", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_add_v5| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-add-v5 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_back_translation_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_back_translation_en.md new file mode 100644 index 00000000000000..0dc5e66b2eeaf8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_back_translation_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_back_translation AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_back_translation +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_back_translation` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_back_translation_en_5.1.2_3.0_1695065295611.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_back_translation_en_5.1.2_3.0_1695065295611.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_back_translation","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_back_translation", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_back_translation| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-back-translation \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_0_en.md new file mode 100644 index 00000000000000..44897f858303a6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_eda_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_eda_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_eda_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_0_en_5.1.2_3.0_1695062983180.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_0_en_5.1.2_3.0_1695062983180.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_eda_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-eda-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_1_en.md new file mode 100644 index 00000000000000..562edfd99994ed --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_eda_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_eda_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_eda_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_1_en_5.1.2_3.0_1695063053385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_1_en_5.1.2_3.0_1695063053385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_eda_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-eda-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_2_en.md new file mode 100644 index 00000000000000..12625144b03a92 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_eda_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_eda_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_eda_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_2_en_5.1.2_3.0_1695063183295.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_2_en_5.1.2_3.0_1695063183295.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_eda_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-eda-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_3_en.md new file mode 100644 index 00000000000000..04fd93108ddf34 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_eda_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_eda_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_eda_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_3_en_5.1.2_3.0_1695063247843.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_3_en_5.1.2_3.0_1695063247843.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_eda_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-eda-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_4_en.md new file mode 100644 index 00000000000000..0980685e45b782 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_eda_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_eda_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_eda_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_eda_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_4_en_5.1.2_3.0_1695063390616.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_eda_4_en_5.1.2_3.0_1695063390616.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_eda_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_eda_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-eda-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_vanilla_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_vanilla_en.md new file mode 100644 index 00000000000000..dcae8e0298dbca --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_vanilla_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_vanilla AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_vanilla +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_vanilla` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_vanilla_en_5.1.2_3.0_1695067812718.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_vanilla_en_5.1.2_3.0_1695067812718.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_vanilla","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_vanilla", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_vanilla| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-vanilla \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0_en.md new file mode 100644 index 00000000000000..03e1b745a614af --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0_en_5.1.2_3.0_1695067652571.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0_en_5.1.2_3.0_1695067652571.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-embedding-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1_en.md new file mode 100644 index 00000000000000..e6a23553e8df1b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1_en_5.1.2_3.0_1695067887532.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1_en_5.1.2_3.0_1695067887532.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-embedding-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2_en.md new file mode 100644 index 00000000000000..417a99c46e5cbd --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2_en_5.1.2_3.0_1695067953535.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2_en_5.1.2_3.0_1695067953535.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-embedding-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3_en.md new file mode 100644 index 00000000000000..4bf749b4530064 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3_en_5.1.2_3.0_1695068239907.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3_en_5.1.2_3.0_1695068239907.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-embedding-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4_en.md new file mode 100644 index 00000000000000..a7c7b15ec34230 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4_en_5.1.2_3.0_1695068320901.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4_en_5.1.2_3.0_1695068320901.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_embedding_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-embedding-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0_en.md new file mode 100644 index 00000000000000..1dfa404b2be8bb --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0_en_5.1.2_3.0_1695068406014.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0_en_5.1.2_3.0_1695068406014.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-random-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1_en.md new file mode 100644 index 00000000000000..1cbd216e5592c3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1_en_5.1.2_3.0_1695068465760.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1_en_5.1.2_3.0_1695068465760.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-random-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2_en.md new file mode 100644 index 00000000000000..fc039f4e347847 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2_en_5.1.2_3.0_1695068549948.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2_en_5.1.2_3.0_1695068549948.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-random-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3_en.md new file mode 100644 index 00000000000000..762fc9e9a8e416 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3_en_5.1.2_3.0_1695068636878.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3_en_5.1.2_3.0_1695068636878.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-random-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4_en.md new file mode 100644 index 00000000000000..8194ba5ec31329 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4_en_5.1.2_3.0_1695068733629.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4_en_5.1.2_3.0_1695068733629.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_random_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-random-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0_en.md new file mode 100644 index 00000000000000..478eaf9524ae7b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0_en_5.1.2_3.0_1695065669939.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0_en_5.1.2_3.0_1695065669939.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_0| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-synonym-0 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1_en.md new file mode 100644 index 00000000000000..3e17025c6f63be --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1_en_5.1.2_3.0_1695066320040.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1_en_5.1.2_3.0_1695066320040.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-synonym-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2_en.md new file mode 100644 index 00000000000000..29e327a48f9ad6 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2_en_5.1.2_3.0_1695067055008.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2_en_5.1.2_3.0_1695067055008.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-synonym-2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3_en.md new file mode 100644 index 00000000000000..6f137756072323 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3_en_5.1.2_3.0_1695067235586.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3_en_5.1.2_3.0_1695067235586.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_3| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-synonym-3 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4_en.md b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4_en.md new file mode 100644 index 00000000000000..a757614ada68db --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4 AlbertForSequenceClassification from m3 +author: John Snow Labs +name: m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4` is a English model originally trained by m3. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4_en_5.1.2_3.0_1695067485305.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4_en_5.1.2_3.0_1695067485305.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m3_experiment_albert_base_v2_tweet_eval_irony_word_swapping_synonym_4| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/m3/m3-experiment-albert-base-v2-tweet-eval-irony-word-swapping-synonym-4 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-mnli_albert_base_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-mnli_albert_base_v2_en.md new file mode 100644 index 00000000000000..29e71daaf10d5c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-mnli_albert_base_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English mnli_albert_base_v2 AlbertForSequenceClassification from boychaboy +author: John Snow Labs +name: mnli_albert_base_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`mnli_albert_base_v2` is a English model originally trained by boychaboy. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/mnli_albert_base_v2_en_5.1.2_3.0_1695065135816.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/mnli_albert_base_v2_en_5.1.2_3.0_1695065135816.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("mnli_albert_base_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("mnli_albert_base_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|mnli_albert_base_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/boychaboy/MNLI_albert-base-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-nosql_identifier_albert_en.md b/docs/_posts/ahmedlone127/2023-09-18-nosql_identifier_albert_en.md new file mode 100644 index 00000000000000..c6d44038539605 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-nosql_identifier_albert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English nosql_identifier_albert AlbertForSequenceClassification from ankush-003 +author: John Snow Labs +name: nosql_identifier_albert +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`nosql_identifier_albert` is a English model originally trained by ankush-003. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/nosql_identifier_albert_en_5.1.2_3.0_1695066381574.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/nosql_identifier_albert_en_5.1.2_3.0_1695066381574.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("nosql_identifier_albert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("nosql_identifier_albert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|nosql_identifier_albert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/ankush-003/nosql-identifier-albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-sst2_eda_albert_en.md b/docs/_posts/ahmedlone127/2023-09-18-sst2_eda_albert_en.md new file mode 100644 index 00000000000000..51ab1bdb5f807f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-sst2_eda_albert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English sst2_eda_albert AlbertForSequenceClassification from o2poi +author: John Snow Labs +name: sst2_eda_albert +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`sst2_eda_albert` is a English model originally trained by o2poi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/sst2_eda_albert_en_5.1.2_3.0_1695065782859.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/sst2_eda_albert_en_5.1.2_3.0_1695065782859.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("sst2_eda_albert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("sst2_eda_albert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|sst2_eda_albert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/o2poi/sst2-eda-albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-trecdl22_crossencoder_albert_en.md b/docs/_posts/ahmedlone127/2023-09-18-trecdl22_crossencoder_albert_en.md new file mode 100644 index 00000000000000..5cf75f40893e24 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-trecdl22_crossencoder_albert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English trecdl22_crossencoder_albert AlbertForSequenceClassification from naver +author: John Snow Labs +name: trecdl22_crossencoder_albert +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`trecdl22_crossencoder_albert` is a English model originally trained by naver. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/trecdl22_crossencoder_albert_en_5.1.2_3.0_1695067000082.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/trecdl22_crossencoder_albert_en_5.1.2_3.0_1695067000082.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("trecdl22_crossencoder_albert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("trecdl22_crossencoder_albert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|trecdl22_crossencoder_albert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|425.6 MB| + +## References + +https://huggingface.co/naver/trecdl22-crossencoder-albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-18-yhi_message_type_paraphrase_albert_small_v2_en.md b/docs/_posts/ahmedlone127/2023-09-18-yhi_message_type_paraphrase_albert_small_v2_en.md new file mode 100644 index 00000000000000..d8468214f0640f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-18-yhi_message_type_paraphrase_albert_small_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English yhi_message_type_paraphrase_albert_small_v2 AlbertForSequenceClassification from cvapict +author: John Snow Labs +name: yhi_message_type_paraphrase_albert_small_v2 +date: 2023-09-18 +tags: [albert, en, open_source, sequence_classification, onnx] +task: Text Classification +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForSequenceClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForSequenceClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`yhi_message_type_paraphrase_albert_small_v2` is a English model originally trained by cvapict. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/yhi_message_type_paraphrase_albert_small_v2_en_5.1.2_3.0_1695068956108.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/yhi_message_type_paraphrase_albert_small_v2_en_5.1.2_3.0_1695068956108.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForSequenceClassification.pretrained("yhi_message_type_paraphrase_albert_small_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("class") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForSequenceClassification + .pretrained("yhi_message_type_paraphrase_albert_small_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("class") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|yhi_message_type_paraphrase_albert_small_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[class]| +|Language:|en| +|Size:|44.2 MB| + +## References + +https://huggingface.co/cvapict/yhi-message-type-paraphrase-albert-small-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_base_finetuned_recipeqa_modified_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_base_finetuned_recipeqa_modified_en.md new file mode 100644 index 00000000000000..705840f9b3c83b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_base_finetuned_recipeqa_modified_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_finetuned_recipeqa_modified AlbertForQuestionAnswering from tamhuynh27 +author: John Snow Labs +name: albert_base_finetuned_recipeqa_modified +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_finetuned_recipeqa_modified` is a English model originally trained by tamhuynh27. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_finetuned_recipeqa_modified_en_5.1.2_3.0_1695096558497.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_finetuned_recipeqa_modified_en_5.1.2_3.0_1695096558497.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_base_finetuned_recipeqa_modified","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_base_finetuned_recipeqa_modified", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_finetuned_recipeqa_modified| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/tamhuynh27/albert-base-finetuned-recipeqa-modified \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_base_qa_squad2_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_base_qa_squad2_en.md new file mode 100644 index 00000000000000..5d87cc00b0df8d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_base_qa_squad2_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from twmkn9) +author: John Snow Labs +name: albert_base_qa_squad2 +date: 2023-09-19 +tags: [open_source, albert, question_answering, en, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-base-v2-squad2` is a English model originally trained by `twmkn9`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_qa_squad2_en_5.1.2_3.0_1695099216275.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_qa_squad2_en_5.1.2_3.0_1695099216275.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_base_qa_squad2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_base_qa_squad2","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.span_question.albert").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_qa_squad2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +References + +https://huggingface.co/twmkn9/albert-base-v2-squad2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_base_v2_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_base_v2_en.md new file mode 100644 index 00000000000000..8d2056a3f57e0c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_base_v2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2 AlbertForTokenClassification from vumichien +author: John Snow Labs +name: albert_base_v2 +date: 2023-09-19 +tags: [albert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2` is a English model originally trained by vumichien. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_en_5.1.2_3.0_1695087778578.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_en_5.1.2_3.0_1695087778578.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("albert_base_v2","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("albert_base_v2", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/vumichien/albert-base-v2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_base_v2_finetuned_squad_attempt_1_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_base_v2_finetuned_squad_attempt_1_en.md new file mode 100644 index 00000000000000..52b55dde7ac724 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_base_v2_finetuned_squad_attempt_1_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_base_v2_finetuned_squad_attempt_1 AlbertForQuestionAnswering from tyuukau +author: John Snow Labs +name: albert_base_v2_finetuned_squad_attempt_1 +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_base_v2_finetuned_squad_attempt_1` is a English model originally trained by tyuukau. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_squad_attempt_1_en_5.1.2_3.0_1695096771216.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_base_v2_finetuned_squad_attempt_1_en_5.1.2_3.0_1695096771216.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_base_v2_finetuned_squad_attempt_1","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_base_v2_finetuned_squad_attempt_1", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_base_v2_finetuned_squad_attempt_1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/tyuukau/albert-base-v2-finetuned-squad-attempt-1 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_for_question_answering_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_for_question_answering_en.md new file mode 100644 index 00000000000000..593133c6ae3854 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_for_question_answering_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_for_question_answering AlbertForQuestionAnswering from Zamachi +author: John Snow Labs +name: albert_for_question_answering +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_for_question_answering` is a English model originally trained by Zamachi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_for_question_answering_en_5.1.2_3.0_1695096011865.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_for_question_answering_en_5.1.2_3.0_1695096011865.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_for_question_answering","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_for_question_answering", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_for_question_answering| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|41.9 MB| + +## References + +https://huggingface.co/Zamachi/albert-for-question-answering \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_conll2003_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_conll2003_en.md new file mode 100644 index 00000000000000..03b26b4339a408 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_conll2003_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_ner_conll2003 AlbertForTokenClassification from Gladiator +author: John Snow Labs +name: albert_large_v2_ner_conll2003 +date: 2023-09-19 +tags: [albert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_ner_conll2003` is a English model originally trained by Gladiator. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_ner_conll2003_en_5.1.2_3.0_1695087646804.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_ner_conll2003_en_5.1.2_3.0_1695087646804.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("albert_large_v2_ner_conll2003","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("albert_large_v2_ner_conll2003", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_ner_conll2003| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|62.8 MB| + +## References + +https://huggingface.co/Gladiator/albert-large-v2_ner_conll2003 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_wikiann_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_wikiann_en.md new file mode 100644 index 00000000000000..c90b0b6ecf37f4 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_wikiann_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_ner_wikiann AlbertForTokenClassification from Gladiator +author: John Snow Labs +name: albert_large_v2_ner_wikiann +date: 2023-09-19 +tags: [albert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_ner_wikiann` is a English model originally trained by Gladiator. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_ner_wikiann_en_5.1.2_3.0_1695087727017.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_ner_wikiann_en_5.1.2_3.0_1695087727017.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("albert_large_v2_ner_wikiann","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("albert_large_v2_ner_wikiann", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_ner_wikiann| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|62.8 MB| + +## References + +https://huggingface.co/Gladiator/albert-large-v2_ner_wikiann \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_wnut_17_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_wnut_17_en.md new file mode 100644 index 00000000000000..eab66a127c975f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_ner_wnut_17_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_ner_wnut_17 AlbertForTokenClassification from Gladiator +author: John Snow Labs +name: albert_large_v2_ner_wnut_17 +date: 2023-09-19 +tags: [albert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_ner_wnut_17` is a English model originally trained by Gladiator. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_ner_wnut_17_en_5.1.2_3.0_1695087559225.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_ner_wnut_17_en_5.1.2_3.0_1695087559225.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("albert_large_v2_ner_wnut_17","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("albert_large_v2_ner_wnut_17", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_ner_wnut_17| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|62.8 MB| + +## References + +https://huggingface.co/Gladiator/albert-large-v2_ner_wnut_17 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_spoken_squad_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_spoken_squad_en.md new file mode 100644 index 00000000000000..2cbc7eabd28291 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_large_v2_spoken_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_large_v2_spoken_squad AlbertForQuestionAnswering from AshtonIsNotHere +author: John Snow Labs +name: albert_large_v2_spoken_squad +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_large_v2_spoken_squad` is a English model originally trained by AshtonIsNotHere. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_large_v2_spoken_squad_en_5.1.2_3.0_1695096969866.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_large_v2_spoken_squad_en_5.1.2_3.0_1695096969866.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_large_v2_spoken_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_large_v2_spoken_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_large_v2_spoken_squad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|62.7 MB| + +## References + +https://huggingface.co/AshtonIsNotHere/albert-large-v2-spoken-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_finetuned_squad_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_finetuned_squad_en.md new file mode 100644 index 00000000000000..0b9e09e9519397 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_finetuned_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_persian_farsi_base_v2_finetuned_squad AlbertForQuestionAnswering from mhmsadegh +author: John Snow Labs +name: albert_persian_farsi_base_v2_finetuned_squad +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_finetuned_squad` is a English model originally trained by mhmsadegh. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_finetuned_squad_en_5.1.2_3.0_1695098037048.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_finetuned_squad_en_5.1.2_3.0_1695098037048.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_persian_farsi_base_v2_finetuned_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_persian_farsi_base_v2_finetuned_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_finetuned_squad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|66.3 MB| + +## References + +https://huggingface.co/mhmsadegh/albert-fa-base-v2-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_ner_arman_fa.md b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_ner_arman_fa.md new file mode 100644 index 00000000000000..499823e497a777 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_ner_arman_fa.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Persian albert_persian_farsi_base_v2_ner_arman AlbertForTokenClassification from m3hrdadfi +author: John Snow Labs +name: albert_persian_farsi_base_v2_ner_arman +date: 2023-09-19 +tags: [albert, fa, open_source, token_classification, onnx] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_ner_arman` is a Persian model originally trained by m3hrdadfi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_ner_arman_fa_5.1.2_3.0_1695087624458.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_ner_arman_fa_5.1.2_3.0_1695087624458.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("albert_persian_farsi_base_v2_ner_arman","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("albert_persian_farsi_base_v2_ner_arman", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_ner_arman| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|fa| +|Size:|66.4 MB| + +## References + +https://huggingface.co/m3hrdadfi/albert-fa-base-v2-ner-arman \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_ner_peyma_fa.md b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_ner_peyma_fa.md new file mode 100644 index 00000000000000..5319deb4092215 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_ner_peyma_fa.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Persian albert_persian_farsi_base_v2_ner_peyma AlbertForTokenClassification from m3hrdadfi +author: John Snow Labs +name: albert_persian_farsi_base_v2_ner_peyma +date: 2023-09-19 +tags: [albert, fa, open_source, token_classification, onnx] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_ner_peyma` is a Persian model originally trained by m3hrdadfi. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_ner_peyma_fa_5.1.2_3.0_1695087715650.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_ner_peyma_fa_5.1.2_3.0_1695087715650.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("albert_persian_farsi_base_v2_ner_peyma","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("albert_persian_farsi_base_v2_ner_peyma", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_ner_peyma| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|fa| +|Size:|66.4 MB| + +## References + +https://huggingface.co/m3hrdadfi/albert-fa-base-v2-ner-peyma \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_parsquad_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_parsquad_en.md new file mode 100644 index 00000000000000..1ca217c2832ed5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_parsquad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_persian_farsi_base_v2_parsquad AlbertForQuestionAnswering from mohsenfayyaz +author: John Snow Labs +name: albert_persian_farsi_base_v2_parsquad +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_parsquad` is a English model originally trained by mohsenfayyaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_parsquad_en_5.1.2_3.0_1695098252255.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_parsquad_en_5.1.2_3.0_1695098252255.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_persian_farsi_base_v2_parsquad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_persian_farsi_base_v2_parsquad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_parsquad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|66.3 MB| + +## References + +https://huggingface.co/mohsenfayyaz/albert-fa-base-v2_parsquad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_persian_qa_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_persian_qa_en.md new file mode 100644 index 00000000000000..ede1f6eb70f62d --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_persian_qa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_persian_farsi_base_v2_persian_qa AlbertForQuestionAnswering from mohsenfayyaz +author: John Snow Labs +name: albert_persian_farsi_base_v2_persian_qa +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_persian_qa` is a English model originally trained by mohsenfayyaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_persian_qa_en_5.1.2_3.0_1695098180715.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_persian_qa_en_5.1.2_3.0_1695098180715.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_persian_farsi_base_v2_persian_qa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_persian_farsi_base_v2_persian_qa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_persian_qa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|66.3 MB| + +## References + +https://huggingface.co/mohsenfayyaz/albert-fa-base-v2_persian_qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_pquad_and_persian_qa_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_pquad_and_persian_qa_en.md new file mode 100644 index 00000000000000..00570a14d286bc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_pquad_and_persian_qa_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_persian_farsi_base_v2_pquad_and_persian_qa AlbertForQuestionAnswering from mohsenfayyaz +author: John Snow Labs +name: albert_persian_farsi_base_v2_pquad_and_persian_qa +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_pquad_and_persian_qa` is a English model originally trained by mohsenfayyaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_pquad_and_persian_qa_en_5.1.2_3.0_1695098320355.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_pquad_and_persian_qa_en_5.1.2_3.0_1695098320355.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_persian_farsi_base_v2_pquad_and_persian_qa","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_persian_farsi_base_v2_pquad_and_persian_qa", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_pquad_and_persian_qa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|66.3 MB| + +## References + +https://huggingface.co/mohsenfayyaz/albert-fa-base-v2_pquad_and_persian_qa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_pquad_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_pquad_en.md new file mode 100644 index 00000000000000..220f0ba4c0d54b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_base_v2_pquad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_persian_farsi_base_v2_pquad AlbertForQuestionAnswering from mohsenfayyaz +author: John Snow Labs +name: albert_persian_farsi_base_v2_pquad +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_base_v2_pquad` is a English model originally trained by mohsenfayyaz. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_pquad_en_5.1.2_3.0_1695098109290.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_base_v2_pquad_en_5.1.2_3.0_1695098109290.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_persian_farsi_base_v2_pquad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_persian_farsi_base_v2_pquad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_base_v2_pquad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|66.3 MB| + +## References + +https://huggingface.co/mohsenfayyaz/albert-fa-base-v2_pquad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_zwnj_base_v2_ner_fa.md b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_zwnj_base_v2_ner_fa.md new file mode 100644 index 00000000000000..958dd83bf9889e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_persian_farsi_zwnj_base_v2_ner_fa.md @@ -0,0 +1,93 @@ +--- +layout: model +title: Persian albert_persian_farsi_zwnj_base_v2_ner AlbertForTokenClassification from HooshvareLab +author: John Snow Labs +name: albert_persian_farsi_zwnj_base_v2_ner +date: 2023-09-19 +tags: [albert, fa, open_source, token_classification, onnx] +task: Named Entity Recognition +language: fa +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_persian_farsi_zwnj_base_v2_ner` is a Persian model originally trained by HooshvareLab. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_zwnj_base_v2_ner_fa_5.1.2_3.0_1695087008371.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_persian_farsi_zwnj_base_v2_ner_fa_5.1.2_3.0_1695087008371.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("albert_persian_farsi_zwnj_base_v2_ner","fa") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("albert_persian_farsi_zwnj_base_v2_ner", "fa") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_persian_farsi_zwnj_base_v2_ner| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|fa| +|Size:|42.0 MB| + +## References + +https://huggingface.co/HooshvareLab/albert-fa-zwnj-base-v2-ner \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_base_v2_squad_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_base_v2_squad_en.md new file mode 100644 index 00000000000000..1677838e86281e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_base_v2_squad_en.md @@ -0,0 +1,99 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from madlag) +author: John Snow Labs +name: albert_qa_base_v2_squad +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-base-v2-squad` is a English model originally trained by `madlag`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_base_v2_squad_en_5.1.2_3.0_1695098215184.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_base_v2_squad_en_5.1.2_3.0_1695098215184.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_base_v2_squad","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_base_v2_squad","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squad.albert.base_v2").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_base_v2_squad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +References + +- https://huggingface.co/madlag/albert-base-v2-squad +- https://github.com/google-research/albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_base_v2_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_base_v2_en.md new file mode 100644 index 00000000000000..34b2c420f80759 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_base_v2_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English AlbertForQuestionAnswering Base model (from elgeish) +author: John Snow Labs +name: albert_qa_cs224n_squad2.0_base_v2 +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `cs224n-squad2.0-albert-base-v2` is a English model originally trained by `elgeish`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_cs224n_squad2.0_base_v2_en_5.1.2_3.0_1695097578480.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_cs224n_squad2.0_base_v2_en_5.1.2_3.0_1695097578480.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_cs224n_squad2.0_base_v2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_cs224n_squad2.0_base_v2","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.albert.base_v2.by_elgeish").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_cs224n_squad2.0_base_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +References + +- https://huggingface.co/elgeish/cs224n-squad2.0-albert-base-v2 +- http://web.stanford.edu/class/cs224n/project/default-final-project-handout.pdf +- https://rajpurkar.github.io/SQuAD-explorer/ +- https://github.com/elgeish/squad/tree/master/data \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_large_v2_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_large_v2_en.md new file mode 100644 index 00000000000000..0d62800a8ea89b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_large_v2_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English AlbertForQuestionAnswering Large model (from elgeish) +author: John Snow Labs +name: albert_qa_cs224n_squad2.0_large_v2 +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `cs224n-squad2.0-albert-large-v2` is a English model originally trained by `elgeish`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_cs224n_squad2.0_large_v2_en_5.1.2_3.0_1695097676421.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_cs224n_squad2.0_large_v2_en_5.1.2_3.0_1695097676421.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_cs224n_squad2.0_large_v2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_cs224n_squad2.0_large_v2","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.albert.large_v2").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_cs224n_squad2.0_large_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|62.7 MB| + +## References + +References + +- https://huggingface.co/elgeish/cs224n-squad2.0-albert-large-v2 +- http://web.stanford.edu/class/cs224n/project/default-final-project-handout.pdf +- https://rajpurkar.github.io/SQuAD-explorer/ +- https://github.com/elgeish/squad/tree/master/data \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_xxlarge_v1_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_xxlarge_v1_en.md new file mode 100644 index 00000000000000..4551ac8f6ac268 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_cs224n_squad2.0_xxlarge_v1_en.md @@ -0,0 +1,101 @@ +--- +layout: model +title: English AlbertForQuestionAnswering XXLarge model (from elgeish) +author: John Snow Labs +name: albert_qa_cs224n_squad2.0_xxlarge_v1 +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `cs224n-squad2.0-albert-xxlarge-v1` is a English model originally trained by `elgeish`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_cs224n_squad2.0_xxlarge_v1_en_5.1.2_3.0_1695097965207.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_cs224n_squad2.0_xxlarge_v1_en_5.1.2_3.0_1695097965207.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_cs224n_squad2.0_xxlarge_v1","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_cs224n_squad2.0_xxlarge_v1","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.albert.xxl.by_elgeish").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_cs224n_squad2.0_xxlarge_v1| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.7 MB| + +## References + +References + +- https://huggingface.co/elgeish/cs224n-squad2.0-albert-xxlarge-v1 +- http://web.stanford.edu/class/cs224n/project/default-final-project-handout.pdf +- https://rajpurkar.github.io/SQuAD-explorer/ +- https://github.com/elgeish/squad/tree/master/data \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_nlpunibo_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_nlpunibo_en.md new file mode 100644 index 00000000000000..98c6b1e9e230e3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_nlpunibo_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from nlpunibo) +author: John Snow Labs +name: albert_qa_nlpunibo +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert` is a English model originally trained by `nlpunibo`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_nlpunibo_en_5.1.2_3.0_1695098552119.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_nlpunibo_en_5.1.2_3.0_1695098552119.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_nlpunibo","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_nlpunibo","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.albert.by_nlpunibo").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_nlpunibo| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +References + +- https://huggingface.co/nlpunibo/albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_slp_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_slp_en.md new file mode 100644 index 00000000000000..e6340e88813afe --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_slp_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from rowan1224) +author: John Snow Labs +name: albert_qa_slp +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-slp` is a English model originally trained by `rowan1224`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_slp_en_5.1.2_3.0_1695096583068.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_slp_en_5.1.2_3.0_1695096583068.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_slp","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_slp","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.albert.by_rowan1224").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_slp| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +References + +- https://huggingface.co/rowan1224/albert-slp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_squad_slp_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_squad_slp_en.md new file mode 100644 index 00000000000000..166f7655453766 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_squad_slp_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from rowan1224) Squad +author: John Snow Labs +name: albert_qa_squad_slp +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-squad-slp` is a English model originally trained by `rowan1224`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_squad_slp_en_5.1.2_3.0_1695096478522.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_squad_slp_en_5.1.2_3.0_1695096478522.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_squad_slp","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_squad_slp","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squad.albert.by_rowan1224").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_squad_slp| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +References + +- https://huggingface.co/rowan1224/albert-squad-slp \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xlarge_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xlarge_finetuned_en.md new file mode 100644 index 00000000000000..7184538489c9ad --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xlarge_finetuned_en.md @@ -0,0 +1,99 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from 123tarunanand) +author: John Snow Labs +name: albert_qa_xlarge_finetuned +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-xlarge-finetuned` is a English model originally trained by `123tarunanand`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_xlarge_finetuned_en_5.1.2_3.0_1695097465378.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_xlarge_finetuned_en_5.1.2_3.0_1695097465378.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_xlarge_finetuned","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_xlarge_finetuned","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.albert.xl").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_xlarge_finetuned| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|204.7 MB| + +## References + +References + +- https://huggingface.co/123tarunanand/albert-xlarge-finetuned +- https://rajpurkar.github.io/SQuAD-explorer/ \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xlarge_v2_squad_v2_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xlarge_v2_squad_v2_en.md new file mode 100644 index 00000000000000..9d930ff6604448 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xlarge_v2_squad_v2_en.md @@ -0,0 +1,99 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from ktrapeznikov) +author: John Snow Labs +name: albert_qa_xlarge_v2_squad_v2 +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-xlarge-v2-squad-v2` is a English model originally trained by `ktrapeznikov`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_xlarge_v2_squad_v2_en_5.1.2_3.0_1695098142567.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_xlarge_v2_squad_v2_en_5.1.2_3.0_1695098142567.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_xlarge_v2_squad_v2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_xlarge_v2_squad_v2","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.albert.xl_v2").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_xlarge_v2_squad_v2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|204.7 MB| + +## References + +References + +- https://huggingface.co/ktrapeznikov/albert-xlarge-v2-squad-v2 +- https://rajpurkar.github.io/SQuAD-explorer/ \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_tweetqa_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_tweetqa_en.md new file mode 100644 index 00000000000000..a4586ff1274957 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_tweetqa_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from saburbutt) TweetQA +author: John Snow Labs +name: albert_qa_xxlarge_tweetqa +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert_xxlarge_tweetqa` is a English model originally trained by `saburbutt`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_xxlarge_tweetqa_en_5.1.2_3.0_1695099069331.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_xxlarge_tweetqa_en_5.1.2_3.0_1695099069331.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_xxlarge_tweetqa","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_xxlarge_tweetqa","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.trivia.albert.xxl").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_xxlarge_tweetqa| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.0 MB| + +## References + +References + +- https://huggingface.co/saburbutt/albert_xxlarge_tweetqa \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v1_finetuned_squad2_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v1_finetuned_squad2_en.md new file mode 100644 index 00000000000000..47363f974f87cc --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v1_finetuned_squad2_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from replydotai) +author: John Snow Labs +name: albert_qa_xxlarge_v1_finetuned_squad2 +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-xxlarge-v1-finetuned-squad2` is a English model originally trained by `replydotai`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_xxlarge_v1_finetuned_squad2_en_5.1.2_3.0_1695098816257.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_xxlarge_v1_finetuned_squad2_en_5.1.2_3.0_1695098816257.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_xxlarge_v1_finetuned_squad2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_xxlarge_v1_finetuned_squad2","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.albert.xxl").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_xxlarge_v1_finetuned_squad2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.7 MB| + +## References + +References + +- https://huggingface.co/replydotai/albert-xxlarge-v1-finetuned-squad2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v2_squad2_covid_deepset_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v2_squad2_covid_deepset_en.md new file mode 100644 index 00000000000000..9f17fa07455d41 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v2_squad2_covid_deepset_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from armageddon) +author: John Snow Labs +name: albert_qa_xxlarge_v2_squad2_covid_deepset +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-xxlarge-v2-squad2-covid-qa-deepset` is a English model originally trained by `armageddon`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_xxlarge_v2_squad2_covid_deepset_en_5.1.2_3.0_1695097330861.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_xxlarge_v2_squad2_covid_deepset_en_5.1.2_3.0_1695097330861.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_xxlarge_v2_squad2_covid_deepset","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_xxlarge_v2_squad2_covid_deepset","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2_covid.albert.xxl_v2").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_xxlarge_v2_squad2_covid_deepset| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.0 MB| + +## References + +References + +- https://huggingface.co/armageddon/albert-xxlarge-v2-squad2-covid-qa-deepset \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v2_squad2_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v2_squad2_en.md new file mode 100644 index 00000000000000..95a5a7d77c667c --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlarge_v2_squad2_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from mfeb) +author: John Snow Labs +name: albert_qa_xxlarge_v2_squad2 +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert-xxlarge-v2-squad2` is a English model originally trained by `mfeb`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_xxlarge_v2_squad2_en_5.1.2_3.0_1695098470262.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_xxlarge_v2_squad2_en_5.1.2_3.0_1695098470262.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_xxlarge_v2_squad2","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_xxlarge_v2_squad2","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.albert.xxl_v2").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_xxlarge_v2_squad2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.0 MB| + +## References + +References + +- https://huggingface.co/mfeb/albert-xxlarge-v2-squad2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlargev1_squad2_512_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlargev1_squad2_512_en.md new file mode 100644 index 00000000000000..fb4aafad120fee --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_qa_xxlargev1_squad2_512_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from ahotrod) +author: John Snow Labs +name: albert_qa_xxlargev1_squad2_512 +date: 2023-09-19 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `albert_xxlargev1_squad2_512` is a English model originally trained by `ahotrod`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_xxlargev1_squad2_512_en_5.1.2_3.0_1695097007309.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_xxlargev1_squad2_512_en_5.1.2_3.0_1695097007309.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_xxlargev1_squad2_512","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_xxlargev1_squad2_512","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.squadv2.albert.xxl_512d").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_xxlargev1_squad2_512| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.7 MB| + +## References + +References + +- https://huggingface.co/ahotrod/albert_xxlargev1_squad2_512 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_v2_base_finetuned_recipeqa_modified_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_v2_base_finetuned_recipeqa_modified_en.md new file mode 100644 index 00000000000000..89e1963a6e38e5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_v2_base_finetuned_recipeqa_modified_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_v2_base_finetuned_recipeqa_modified AlbertForQuestionAnswering from tamhuynh27 +author: John Snow Labs +name: albert_v2_base_finetuned_recipeqa_modified +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_v2_base_finetuned_recipeqa_modified` is a English model originally trained by tamhuynh27. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_v2_base_finetuned_recipeqa_modified_en_5.1.2_3.0_1695096628837.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_v2_base_finetuned_recipeqa_modified_en_5.1.2_3.0_1695096628837.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_v2_base_finetuned_recipeqa_modified","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_v2_base_finetuned_recipeqa_modified", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_v2_base_finetuned_recipeqa_modified| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/tamhuynh27/albert-v2-base-finetuned-recipeqa-modified \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_xl_v2_finetuned_squad_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_xl_v2_finetuned_squad_en.md new file mode 100644 index 00000000000000..75feb480116802 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_xl_v2_finetuned_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xl_v2_finetuned_squad AlbertForQuestionAnswering from anas-awadalla +author: John Snow Labs +name: albert_xl_v2_finetuned_squad +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xl_v2_finetuned_squad` is a English model originally trained by anas-awadalla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xl_v2_finetuned_squad_en_5.1.2_3.0_1695097658112.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xl_v2_finetuned_squad_en_5.1.2_3.0_1695097658112.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_xl_v2_finetuned_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_xl_v2_finetuned_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xl_v2_finetuned_squad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|204.7 MB| + +## References + +https://huggingface.co/anas-awadalla/albert-xl-v2-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_xlarge_squad_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_xlarge_squad_finetuned_en.md new file mode 100644 index 00000000000000..cfa74cae3399f3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_xlarge_squad_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xlarge_squad_finetuned AlbertForQuestionAnswering from rahulchakwate +author: John Snow Labs +name: albert_xlarge_squad_finetuned +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xlarge_squad_finetuned` is a English model originally trained by rahulchakwate. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xlarge_squad_finetuned_en_5.1.2_3.0_1695096239337.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xlarge_squad_finetuned_en_5.1.2_3.0_1695096239337.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_xlarge_squad_finetuned","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_xlarge_squad_finetuned", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xlarge_squad_finetuned| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|204.7 MB| + +## References + +https://huggingface.co/rahulchakwate/albert-xlarge-squad-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_xxl_v2_finetuned_squad_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_xxl_v2_finetuned_squad_en.md new file mode 100644 index 00000000000000..60fc5639835bf5 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_xxl_v2_finetuned_squad_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xxl_v2_finetuned_squad AlbertForQuestionAnswering from anas-awadalla +author: John Snow Labs +name: albert_xxl_v2_finetuned_squad +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xxl_v2_finetuned_squad` is a English model originally trained by anas-awadalla. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xxl_v2_finetuned_squad_en_5.1.2_3.0_1695097862908.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xxl_v2_finetuned_squad_en_5.1.2_3.0_1695097862908.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_xxl_v2_finetuned_squad","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_xxl_v2_finetuned_squad", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xxl_v2_finetuned_squad| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.0 MB| + +## References + +https://huggingface.co/anas-awadalla/albert-xxl-v2-finetuned-squad \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-albert_xxlarge_squad_finetuned_en.md b/docs/_posts/ahmedlone127/2023-09-19-albert_xxlarge_squad_finetuned_en.md new file mode 100644 index 00000000000000..cda270324178f8 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-albert_xxlarge_squad_finetuned_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_xxlarge_squad_finetuned AlbertForQuestionAnswering from rahulchakwate +author: John Snow Labs +name: albert_xxlarge_squad_finetuned +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_xxlarge_squad_finetuned` is a English model originally trained by rahulchakwate. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_xxlarge_squad_finetuned_en_5.1.2_3.0_1695096458329.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_xxlarge_squad_finetuned_en_5.1.2_3.0_1695096458329.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_xxlarge_squad_finetuned","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_xxlarge_squad_finetuned", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_xxlarge_squad_finetuned| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.7 MB| + +## References + +https://huggingface.co/rahulchakwate/albert-xxlarge-squad-finetuned \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-lodosalberttr_en.md b/docs/_posts/ahmedlone127/2023-09-19-lodosalberttr_en.md new file mode 100644 index 00000000000000..cc0cfa143bb41e --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-lodosalberttr_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English lodosalberttr AlbertForTokenClassification from bgk +author: John Snow Labs +name: lodosalberttr +date: 2023-09-19 +tags: [albert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`lodosalberttr` is a English model originally trained by bgk. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/lodosalberttr_en_5.1.2_3.0_1695087490256.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/lodosalberttr_en_5.1.2_3.0_1695087490256.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("lodosalberttr","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("lodosalberttr", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|lodosalberttr| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|42.9 MB| + +## References + +https://huggingface.co/bgk/lodosalberttr \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-m_albert_qa_model_en.md b/docs/_posts/ahmedlone127/2023-09-19-m_albert_qa_model_en.md new file mode 100644 index 00000000000000..47b6835e0c85d3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-m_albert_qa_model_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English m_albert_qa_model AlbertForQuestionAnswering from Chetna19 +author: John Snow Labs +name: m_albert_qa_model +date: 2023-09-19 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`m_albert_qa_model` is a English model originally trained by Chetna19. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/m_albert_qa_model_en_5.1.2_3.0_1695096857164.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/m_albert_qa_model_en_5.1.2_3.0_1695096857164.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("m_albert_qa_model","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("m_albert_qa_model", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|m_albert_qa_model| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/Chetna19/m_albert_qa_model \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-19-tiny_albert_en.md b/docs/_posts/ahmedlone127/2023-09-19-tiny_albert_en.md new file mode 100644 index 00000000000000..2fbf97a169407b --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-19-tiny_albert_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English tiny_albert AlbertForTokenClassification from vumichien +author: John Snow Labs +name: tiny_albert +date: 2023-09-19 +tags: [albert, en, open_source, token_classification, onnx] +task: Named Entity Recognition +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForTokenClassification +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForTokenClassification model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`tiny_albert` is a English model originally trained by vumichien. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/tiny_albert_en_5.1.2_3.0_1695087950750.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/tiny_albert_en_5.1.2_3.0_1695087950750.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + + +sequenceClassifier = AlbertForTokenClassification.pretrained("tiny_albert","en") \ + .setInputCols(["documents","token"]) \ + .setOutputCol("ner") + +pipeline = Pipeline().setStages([document_assembler, sequenceClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("embeddings") + +val sequenceClassifier = AlbertForTokenClassification + .pretrained("tiny_albert", "en") + .setInputCols(Array("documents","token")) + .setOutputCol("ner") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sequenceClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|tiny_albert| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[documents, token]| +|Output Labels:|[ner]| +|Language:|en| +|Size:|1.0 MB| + +## References + +https://huggingface.co/vumichien/tiny-albert \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-20-albert_qa_ai_club_inductions_21_nlp_en.md b/docs/_posts/ahmedlone127/2023-09-20-albert_qa_ai_club_inductions_21_nlp_en.md new file mode 100644 index 00000000000000..bbe2774b3c3c79 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-20-albert_qa_ai_club_inductions_21_nlp_en.md @@ -0,0 +1,98 @@ +--- +layout: model +title: English AlbertForQuestionAnswering model (from AyushPJ) +author: John Snow Labs +name: albert_qa_ai_club_inductions_21_nlp +date: 2023-09-20 +tags: [en, open_source, albert, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained Question Answering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP. `ai-club-inductions-21-nlp-ALBERT` is a English model originally trained by `AyushPJ`. + +## Predicted Entities + + + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_ai_club_inductions_21_nlp_en_5.1.2_3.0_1695188557360.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_ai_club_inductions_21_nlp_en_5.1.2_3.0_1695188557360.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python +documentAssembler = MultiDocumentAssembler() \ +.setInputCols(["question", "context"]) \ +.setOutputCols(["document_question", "document_context"]) + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_ai_club_inductions_21_nlp","en") \ +.setInputCols(["document_question", "document_context"]) \ +.setOutputCol("answer")\ +.setCaseSensitive(True) + +pipeline = Pipeline(stages=[documentAssembler, spanClassifier]) + +data = spark.createDataFrame([["What is my name?", "My name is Clara and I live in Berkeley."]]).toDF("question", "context") + +result = pipeline.fit(data).transform(data) +``` +```scala +val documentAssembler = new MultiDocumentAssembler() +.setInputCols(Array("question", "context")) +.setOutputCols(Array("document_question", "document_context")) + +val spanClassifer = AlbertForQuestionAnswering.pretrained("albert_qa_ai_club_inductions_21_nlp","en") +.setInputCols(Array("document", "token")) +.setOutputCol("answer") +.setCaseSensitive(true) + +val pipeline = new Pipeline().setStages(Array(documentAssembler, spanClassifier)) + +val data = Seq("What is my name?", "My name is Clara and I live in Berkeley.").toDF("question", "context") + +val result = pipeline.fit(data).transform(data) +``` + +{:.nlu-block} +```python +import nlu +nlu.load("en.answer_question.albert.by_AyushPJ").predict("""What is my name?|||"My name is Clara and I live in Berkeley.""") +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_ai_club_inductions_21_nlp| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +References + +- https://huggingface.co/AyushPJ/ai-club-inductions-21-nlp-ALBERT \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-20-albert_qa_biom_xxlarge_squad2_en.md b/docs/_posts/ahmedlone127/2023-09-20-albert_qa_biom_xxlarge_squad2_en.md new file mode 100644 index 00000000000000..b59b0ed635bcc3 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-20-albert_qa_biom_xxlarge_squad2_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_qa_biom_xxlarge_squad2 AlbertForQuestionAnswering from sultan +author: John Snow Labs +name: albert_qa_biom_xxlarge_squad2 +date: 2023-09-20 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_qa_biom_xxlarge_squad2` is a English model originally trained by sultan. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_biom_xxlarge_squad2_en_5.1.2_3.0_1695188434641.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_biom_xxlarge_squad2_en_5.1.2_3.0_1695188434641.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_biom_xxlarge_squad2","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_qa_biom_xxlarge_squad2", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_biom_xxlarge_squad2| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|771.3 MB| + +## References + +https://huggingface.co/sultan/BioM-ALBERT-xxlarge-SQuAD2 \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-20-albert_qa_qa_1e_en.md b/docs/_posts/ahmedlone127/2023-09-20-albert_qa_qa_1e_en.md new file mode 100644 index 00000000000000..27b603e1afbc75 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-20-albert_qa_qa_1e_en.md @@ -0,0 +1,93 @@ +--- +layout: model +title: English albert_qa_qa_1e AlbertForQuestionAnswering from SalmanMo +author: John Snow Labs +name: albert_qa_qa_1e +date: 2023-09-20 +tags: [albert, en, open_source, question_answering, onnx] +task: Question Answering +language: en +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: onnx +annotator: AlbertForQuestionAnswering +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained AlbertForQuestionAnswering model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`albert_qa_qa_1e` is a English model originally trained by SalmanMo. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/albert_qa_qa_1e_en_5.1.2_3.0_1695188622456.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/albert_qa_qa_1e_en_5.1.2_3.0_1695188622456.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = MultiDocumentAssembler() \ + .setInputCol(["question", "context"]) \ + .setOutputCol(["document_question", "document_context"]) + + +spanClassifier = AlbertForQuestionAnswering.pretrained("albert_qa_qa_1e","en") \ + .setInputCols(["document_question","document_context"]) \ + .setOutputCol("answer") + +pipeline = Pipeline().setStages([document_assembler, spanClassifier]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new MultiDocumentAssembler() + .setInputCol(Array("question", "context")) + .setOutputCol(Array("document_question", "document_context")) + +val spanClassifier = AlbertForQuestionAnswering + .pretrained("albert_qa_qa_1e", "en") + .setInputCols(Array("document_question","document_context")) + .setOutputCol("answer") + +val pipeline = new Pipeline().setStages(Array(document_assembler, spanClassifier)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|albert_qa_qa_1e| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[document_question, document_context]| +|Output Labels:|[answer]| +|Language:|en| +|Size:|42.0 MB| + +## References + +https://huggingface.co/SalmanMo/ALBERT_QA_1e \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_base_xx.md b/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_base_xx.md new file mode 100644 index 00000000000000..fa531cfdfd1f9a --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_base_xx.md @@ -0,0 +1,100 @@ +--- +layout: model +title: Multilingual multilingual_e5_base XlmRoBertaSentenceEmbeddings from intfloat +author: John Snow Labs +name: multilingual_e5_base +date: 2023-09-21 +tags: [xlm_roberta, xx, open_source, tensorflow] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_e5_base` is a Multilingual model originally trained by intfloat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_e5_base_xx_5.1.2_3.0_1695314944172.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_e5_base_xx_5.1.2_3.0_1695314944172.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + +sentencerDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +embeddings =XlmRoBertaSentenceEmbeddings.pretrained("multilingual_e5_base","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, sentencerDL, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val sentencerDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(["document"]) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings + .pretrained("multilingual_e5_base", "xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sentencerDL, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_e5_base| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|667.1 MB| + +## References + +https://huggingface.co/intfloat/multilingual-e5-base \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_large_xx.md b/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_large_xx.md new file mode 100644 index 00000000000000..68f15a5f8e283f --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_large_xx.md @@ -0,0 +1,100 @@ +--- +layout: model +title: Multilingual multilingual_e5_large XlmRoBertaSentenceEmbeddings from intfloat +author: John Snow Labs +name: multilingual_e5_large +date: 2023-09-21 +tags: [xlm_roberta, xx, open_source, tensorflow] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_e5_large` is a Multilingual model originally trained by intfloat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_e5_large_xx_5.1.2_3.0_1695315223158.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_e5_large_xx_5.1.2_3.0_1695315223158.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + +sentencerDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +embeddings =XlmRoBertaSentenceEmbeddings.pretrained("multilingual_e5_large","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, sentencerDL, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val sentencerDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(["document"]) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings + .pretrained("multilingual_e5_large", "xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sentencerDL, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_e5_large| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|1.3 GB| + +## References + +https://huggingface.co/intfloat/multilingual-e5-large \ No newline at end of file diff --git a/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_small_xx.md b/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_small_xx.md new file mode 100644 index 00000000000000..c2760613db9832 --- /dev/null +++ b/docs/_posts/ahmedlone127/2023-09-21-multilingual_e5_small_xx.md @@ -0,0 +1,100 @@ +--- +layout: model +title: Multilingual multilingual_e5_small XlmRoBertaSentenceEmbeddings from intfloat +author: John Snow Labs +name: multilingual_e5_small +date: 2023-09-21 +tags: [xlm_roberta, xx, open_source, tensorflow] +task: Embeddings +language: xx +edition: Spark NLP 5.1.2 +spark_version: 3.0 +supported: true +engine: tensorflow +annotator: XlmRoBertaSentenceEmbeddings +article_header: + type: cover +use_language_switcher: "Python-Scala-Java" +--- + +## Description + +Pretrained XlmRoBertaSentenceEmbeddings model, adapted from Hugging Face and curated to provide scalability and production-readiness using Spark NLP.`multilingual_e5_small` is a Multilingual model originally trained by intfloat. + +{:.btn-box} + + +[Download](https://s3.amazonaws.com/auxdata.johnsnowlabs.com/public/models/multilingual_e5_small_xx_5.1.2_3.0_1695316525385.zip){:.button.button-orange.button-orange-trans.arr.button-icon} +[Copy S3 URI](s3://auxdata.johnsnowlabs.com/public/models/multilingual_e5_small_xx_5.1.2_3.0_1695316525385.zip){:.button.button-orange.button-orange-trans.button-icon.button-copy-s3} + +## How to use + + + +
+{% include programmingLanguageSelectScalaPythonNLU.html %} +```python + + +document_assembler = DocumentAssembler() \ + .setInputCol("text") \ + .setOutputCol("documents") + +sentencerDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx")\ + .setInputCols(["document"])\ + .setOutputCol("sentence") + +embeddings =XlmRoBertaSentenceEmbeddings.pretrained("multilingual_e5_small","xx") \ + .setInputCols(["sentence"]) \ + .setOutputCol("embeddings") + +pipeline = Pipeline().setStages([document_assembler, sentencerDL, embeddings]) + +pipelineModel = pipeline.fit(data) + +pipelineDF = pipelineModel.transform(data) + +``` +```scala + + +val document_assembler = new DocumentAssembler() + .setInputCol("text") + .setOutputCol("documents") + +val sentencerDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl", "xx") + .setInputCols(["document"]) + .setOutputCol("sentence") + +val embeddings = XlmRoBertaSentenceEmbeddings + .pretrained("multilingual_e5_small", "xx") + .setInputCols(Array("sentence")) + .setOutputCol("embeddings") + +val pipeline = new Pipeline().setStages(Array(document_assembler, sentencerDL, embeddings)) + +val pipelineModel = pipeline.fit(data) + +val pipelineDF = pipelineModel.transform(data) + + +``` +
+ +{:.model-param} +## Model Information + +{:.table-model} +|---|---| +|Model Name:|multilingual_e5_small| +|Compatibility:|Spark NLP 5.1.2+| +|License:|Open Source| +|Edition:|Official| +|Input Labels:|[sentence]| +|Output Labels:|[embeddings]| +|Language:|xx| +|Size:|284.4 MB| + +## References + +https://huggingface.co/intfloat/multilingual-e5-small \ No newline at end of file